Python Sampler Examples

Programming Language: Python

Namespace/Package Name: dedal.vocabulary

Method/Function: Sampler

Examples at hotexamples.com: 8

Python Sampler - 8 examples found. These are the top rated real world Python examples of dedal.vocabulary.Sampler extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

  def __init__(self,
               masked_lm_prob = 0.15,
               mask_token_prob = 0.80,
               resample_token_prob = 0.50,
               deterministic_n_lm_tokens = True,
               **kwargs):
    super().__init__(**kwargs)
    self._masked_lm_prob = masked_lm_prob
    self._mask_token_prob = mask_token_prob
    self._resample_token_prob = resample_token_prob
    self._deterministic_n_lm_tokens = deterministic_n_lm_tokens

    # tf.where-based "branching" for BERT's Cloze task
    self._branch1_sampler = (
        tfp.distributions.Uniform() if self._deterministic_n_lm_tokens
        else tfp.distributions.Bernoulli(
            probs=self._masked_lm_prob, dtype=tf.bool)
        )
    self._branch2_sampler = tfp.distributions.Bernoulli(
        probs=self._mask_token_prob, dtype=tf.bool)
    self._branch3_sampler = tfp.distributions.Bernoulli(
        probs=self._resample_token_prob, dtype=tf.bool)

    # Resample (integer-valued) tokens uniformly at random, ignoring any special
    # tokens in the vocabulary
    self._resample_sampler = vocabulary.Sampler(self._vocab)

Example #2

Show file

File: builder_test.py Project: guangyusong/google-research

def make_fake_sequence_dataset(num_examples = 1000):
  voc = vocabulary.alternative
  sampler = vocabulary.Sampler(voc)
  ds = tf.data.Dataset.from_tensor_slices({
      'sequence': sampler.sample((num_examples, 128)),
      'seq_key': tf.range(num_examples, dtype=tf.int32),
      'fam_key': tf.range(num_examples, 2 * num_examples, dtype=tf.int32),
  })
  return ds

Example #3

Show file

def make_fake_homology_dataset(num_examples=1000, seq_len=128):
    voc = vocabulary.proteins
    sampler = vocabulary.Sampler(voc)
    return tf.data.Dataset.from_tensor_slices({
        'sequence':
        sampler.sample((num_examples, seq_len)),
        'target':
        tf.random.uniform(shape=(num_examples, )) > 0.8,
        'weights':
        tf.ones(shape=(num_examples, seq_len), dtype=tf.float32),
    })

Example #4

Show file

File: align_transforms.py Project: MitchellTesla/google-research

 def __init__(self,
              max_len=512,
              len_increase_ratio=2.0,
              logits=None,
              gap_token='-',
              **kwargs):
     super().__init__(**kwargs)
     self._max_len = max_len
     self._len_increase_ratio = len_increase_ratio
     self._sampler = vocabulary.Sampler(
         vocab=self._vocab,
         logits=self.PFAM_LOGITS if logits is None else logits)
     self._gap_token = gap_token
     self._gap_code = self._vocab.get(self._gap_token)

Example #5

Show file

File: specs.py Project: MitchellTesla/google-research

  def __init__(self,
               max_len = 512,
               tau = 0.01,
               alpha = 0.05,
               eta = 0.7,
               vocab = None):
    self._max_len = max_len
    vocab = vocabulary.get_default() if vocab is None else vocab
    self._sampler = vocabulary.Sampler(vocab=vocab)
    self._eos = vocab.get(vocab.specials[-1])
    self._pad = vocab.padding_code

    # Transition look-up table (excluding special initial transition).
    look_up = {
        (self.MATCH, self.MATCH): 1,
        (self.GAP_IN_X, self.MATCH): 2,
        (self.GAP_IN_Y, self.MATCH): 3,
        (self.MATCH, self.GAP_IN_X): 4,
        (self.GAP_IN_X, self.GAP_IN_X): 5,
        (self.GAP_IN_Y, self.GAP_IN_X): 9,  # "forbidden" transition.
        (self.MATCH, self.GAP_IN_Y): 6,
        (self.GAP_IN_X, self.GAP_IN_Y): 7,
        (self.GAP_IN_Y, self.GAP_IN_Y): 8,
    }
    # Builds data structures for efficiently encoding transitions.
    self._hash_fn = lambda d0, d1: 3 * (d1 + 1) + (d0 + 1)
    hashes = [self._hash_fn(d0, d1) for (d0, d1) in look_up]
    trans_encoder = tf.scatter_nd(
        indices=[[x] for x in hashes],
        updates=list(look_up.values()),
        shape=[max(hashes) + 1])
    self._trans_encoder = tf.cast(trans_encoder, tf.int32)
    self._init_trans = tf.convert_to_tensor([self.INIT_TRANS], dtype=tf.int32)

    cond_probs = tf.convert_to_tensor(
        [[0.0, 1.0, 0.0, 0.0, 0.0],
         [0.0, 1.0 - 2.0 * alpha - tau, alpha, alpha, tau],
         [0.0, eta, 1.0 - eta - alpha, alpha, 0.0],
         [0.0, eta, 0.0, 1.0 - eta, 0.0], [0.0, 0.0, 0.0, 0.0, 1.0]],
        tf.float32)
    self._logits = tf.where(cond_probs > 0.0, tf.math.log(cond_probs), -np.inf)

    self._delta_len_x = tf.convert_to_tensor([0, 1, 0, 1, 0])
    self._delta_len_y = tf.convert_to_tensor([0, 1, 1, 0, 0])

Example #6

Show file

def make_fake_dataset(num_examples=1000):
    voc = vocabulary.proteins
    sampler = vocabulary.Sampler(voc)
    ds = tf.data.Dataset.from_tensor_slices(sampler.sample(
        (num_examples, 128)))
    return ds.map(lambda x: {'sequence': x})

Example #7

Show file

File: transforms_test.py Project: MitchellTesla/google-research

 def setUp(self):
     super().setUp()
     gin.clear_config()
     tf.random.set_seed(0)
     self.sampler = vocabulary.Sampler()
     self.seq = self.sampler.sample((256, ))

Example #8

Show file

File: transforms_test.py Project: MitchellTesla/google-research

 def setUp(self):
     super().setUp()
     tf.random.set_seed(0)
     self.vocab = vocabulary.alternative
     self.sampler = vocabulary.Sampler(vocab=self.vocab)