def build_sad(self, job, data_ops, embed_penultimate=False, target_subset=None): """Build SAD predict ops.""" if not self.hparams_set: self.hp = params.make_hparams(job) self.hparams_set = True # training conditional self.is_training = tf.placeholder(tf.bool, name='is_training') # eval data ops w/ deterministic augmentation data_ops_eval = augmentation.augment_deterministic_set( data_ops, job["ensemble_rc"], job["ensemble_shifts"]) data_seq_eval = tf.stack([do['sequence'] for do in data_ops_eval]) data_rev_eval = tf.stack([do['reverse_preds'] for do in data_ops_eval]) # compute eval representation map_elems_eval = (data_seq_eval, data_rev_eval) build_rep = lambda do: self.build_predict(do[0], do[ 1], embed_penultimate, target_subset) self.preds_ensemble = tf.map_fn(build_rep, map_elems_eval, dtype=tf.float32, back_prop=False) self.preds_eval = tf.reduce_mean(self.preds_ensemble, axis=0) # update # targets if target_subset is not None: self.hp.num_targets = len(target_subset) # helper variables self.preds_length = self.preds_eval.shape[1]
def build_feed( self, job, augment_rc=False, augment_shifts=[0], ensemble_rc=False, ensemble_shifts=[0], embed_penultimate=False, target_subset=None, ): """Build training ops that depend on placeholders.""" self.hp = params.make_hparams(job) self.hparams_set = True data_ops = self.make_placeholders() self.build_from_data_ops( job, data_ops, augment_rc=augment_rc, augment_shifts=augment_shifts, ensemble_rc=ensemble_rc, ensemble_shifts=ensemble_shifts, embed_penultimate=embed_penultimate, target_subset=target_subset, )
def build(self, job, target_subset=None): """Build training ops that depend on placeholders.""" self.hp = params.make_hparams(job) self.hparams_set = True data_ops = self.make_placeholders() self.build_from_data_ops(job, data_ops, target_subset)
def build_feed(self, job, embed_penultimate=False, target_subset=None): """Build training ops that depend on placeholders.""" self.hp = params.make_hparams(job) self.hparams_set = True data_ops = self.make_placeholders() self.build_from_data_ops(job, data_ops, embed_penultimate=embed_penultimate, target_subset=target_subset)
def build_from_data_ops(self, job, data_ops, target_subset=None): """Build training ops from input data ops.""" if not self.hparams_set: self.hp = params.make_hparams(job) self.hparams_set = True self.targets = data_ops['label'] self.inputs = data_ops['sequence'] self.targets_na = data_ops['na'] seqs_repr = self.build_representation(data_ops, target_subset) self.loss_op, self.loss_adhoc = self.build_loss( seqs_repr, data_ops, target_subset) self.build_optimizer(self.loss_op)
def build_feed_sad(self, job, ensemble_rc=False, ensemble_shifts=[0], embed_penultimate=False, target_subset=None): """Build SAD predict ops that depend on placeholders.""" self.hp = params.make_hparams(job) self.hparams_set = True data_ops = self.make_placeholders() self.build_sad(job, data_ops, ensemble_rc=ensemble_rc, ensemble_shifts=ensemble_shifts, embed_penultimate=embed_penultimate, target_subset=target_subset)
def build_from_data_ops(self, job, data_ops, embed_penultimate=False, target_subset=None): """Build training ops from input data ops.""" if not self.hparams_set: self.hp = params.make_hparams(job) self.hparams_set = True # training conditional self.is_training = tf.placeholder(tf.bool, name='is_training') ################################################## # training # training data_ops w/ stochastic augmentation data_ops_train = augmentation.augment_stochastic( data_ops, job["augment_rc"], job["augment_shifts"]) # compute train representation self.preds_train = self.build_predict(data_ops_train['sequence'], None, embed_penultimate, target_subset, save_reprs=True) self.target_length = self.preds_train.shape[1].value # training losses if not embed_penultimate: loss_returns = self.build_loss(self.preds_train, data_ops_train['label'], target_subset) self.loss_train, self.loss_train_targets, self.targets_train = loss_returns # optimizer self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.build_optimizer(self.loss_train) ################################################## # eval # eval data ops w/ deterministic augmentation data_ops_eval = augmentation.augment_deterministic_set( data_ops, job["ensemble_rc"], job["ensemble_shifts"]) data_seq_eval = tf.stack([do['sequence'] for do in data_ops_eval]) data_rev_eval = tf.stack([do['reverse_preds'] for do in data_ops_eval]) # compute eval representation map_elems_eval = (data_seq_eval, data_rev_eval) build_rep = lambda do: self.build_predict(do[0], do[ 1], embed_penultimate, target_subset) self.preds_ensemble = tf.map_fn(build_rep, map_elems_eval, dtype=tf.float32, back_prop=False) self.preds_eval = tf.reduce_mean(self.preds_ensemble, axis=0) # eval loss if not embed_penultimate: loss_returns = self.build_loss(self.preds_eval, data_ops['label'], target_subset) self.loss_eval, self.loss_eval_targets, self.targets_eval = loss_returns # update # targets if target_subset is not None: self.hp.num_targets = len(target_subset) # helper variables self.preds_length = self.preds_train.shape[1]
def build_from_data_ops( self, job, data_ops, augment_rc=False, augment_shifts=[0], ensemble_rc=False, ensemble_shifts=[0], embed_penultimate=False, target_subset=None, ): """Build training ops from input data ops.""" if not self.hparams_set: self.hp = params.make_hparams(job) self.hparams_set = True # training conditional self.is_training = tf.placeholder(tf.bool, name="is_training") ################################################## # training # training data_ops w/ stochastic augmentation data_ops_train = augmentation.augment_stochastic( data_ops, augment_rc, augment_shifts) # compute train representation self.preds_train = self.build_predict( data_ops_train["sequence"], None, embed_penultimate, target_subset, save_reprs=True, ) self.target_length = self.preds_train.shape[1].value # training losses if not embed_penultimate: loss_returns = self.build_loss( self.preds_train, data_ops_train["label"], data_ops.get("genome", None), target_subset, ) self.loss_train, self.loss_train_targets, self.targets_train = loss_returns[: 3] # optimizer self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) self.build_optimizer(self.loss_train) # allegedly correct, but outperformed by skipping # with tf.control_dependencies(self.update_ops): # self.build_optimizer(self.loss_train) ################################################## # eval # eval data ops w/ deterministic augmentation data_ops_eval = augmentation.augment_deterministic_set( data_ops, ensemble_rc, ensemble_shifts) data_seq_eval = tf.stack([do["sequence"] for do in data_ops_eval]) data_rev_eval = tf.stack([do["reverse_preds"] for do in data_ops_eval]) # compute eval representation map_elems_eval = (data_seq_eval, data_rev_eval) build_rep = lambda do: self.build_predict(do[0], do[ 1], embed_penultimate, target_subset) self.preds_ensemble = tf.map_fn(build_rep, map_elems_eval, dtype=tf.float32, back_prop=False) self.preds_eval = tf.reduce_mean(self.preds_ensemble, axis=0) # eval loss and metrics if not embed_penultimate: loss_returns = self.build_loss( self.preds_eval, data_ops["label"], data_ops.get("genome", None), target_subset, ) self.loss_eval, self.loss_eval_targets, self.targets_eval, self.preds_eval_loss = ( loss_returns) # update # targets if target_subset is not None: self.hp.num_targets = len(target_subset) # helper variables self.preds_length = self.preds_train.shape[1].value