def forward(self, src_word, trg_word): src_max_len = paddle.shape(src_word)[-1] trg_max_len = paddle.shape(trg_word)[-1] base_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 src_slf_attn_bias = base_attn_bias src_slf_attn_bias.stop_gradient = True trg_slf_attn_bias = paddle.tensor.triu( (paddle.ones( (trg_max_len, trg_max_len), dtype=paddle.get_default_dtype()) * -np.inf), 1) trg_slf_attn_bias.stop_gradient = True trg_src_attn_bias = paddle.tile(base_attn_bias, [1, 1, trg_max_len, 1]) src_pos = paddle.cast( src_word != self.bos_id, dtype="int64") * paddle.arange( start=0, end=src_max_len) trg_pos = paddle.cast( trg_word != self.bos_id, dtype="int64") * paddle.arange( start=0, end=trg_max_len) src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout( src_emb, p=self.dropout, training=self.training) if self.dropout else src_emb with paddle.static.amp.fp16_guard(): if self.waitk >= src_max_len or self.waitk == -1: # Full sentence enc_outputs = [ self.encoder( enc_input, src_mask=src_slf_attn_bias) ] else: # Wait-k policy enc_outputs = [] for i in range(self.waitk, src_max_len + 1): enc_output = self.encoder( enc_input[:, :i, :], src_mask=src_slf_attn_bias[:, :, :, :i]) enc_outputs.append(enc_output) trg_emb = self.trg_word_embedding(trg_word) trg_pos_emb = self.trg_pos_embedding(trg_pos) trg_emb = trg_emb + trg_pos_emb dec_input = F.dropout( trg_emb, p=self.dropout, training=self.training) if self.dropout else trg_emb dec_output = self.decoder( dec_input, enc_outputs, tgt_mask=trg_slf_attn_bias, memory_mask=trg_src_attn_bias) predict = self.linear(dec_output) return predict
def __init__(self, embedding_name=EMBEDDING_NAME_LIST[0], unknown_token=UNK_TOKEN, unknown_token_vector=None, extended_vocab_path=None, trainable=True, keep_extended_vocab_only=False): vector_path = osp.join(EMBEDDING_HOME, embedding_name + ".npz") if not osp.exists(vector_path): # download url = EMBEDDING_URL_ROOT + "/" + embedding_name + ".tar.gz" get_path_from_url(url, EMBEDDING_HOME) logger.info("Loading token embedding...") vector_np = np.load(vector_path) self.embedding_dim = vector_np['embedding'].shape[1] self.unknown_token = unknown_token if unknown_token_vector is not None: unk_vector = np.array(unknown_token_vector).astype( paddle.get_default_dtype()) else: unk_vector = np.random.normal(scale=0.02, size=self.embedding_dim).astype( paddle.get_default_dtype()) pad_vector = np.array([0] * self.embedding_dim).astype( paddle.get_default_dtype()) if extended_vocab_path is not None: embedding_table = self._extend_vocab(extended_vocab_path, vector_np, pad_vector, unk_vector, keep_extended_vocab_only) trainable = True else: embedding_table = self._init_without_extend_vocab( vector_np, pad_vector, unk_vector) self.vocab = Vocab.from_dict(self._word_to_idx, unk_token=unknown_token, pad_token=PAD_TOKEN) self.num_embeddings = embedding_table.shape[0] # import embedding super(TokenEmbedding, self).__init__(self.num_embeddings, self.embedding_dim, padding_idx=self._word_to_idx[PAD_TOKEN]) self.weight.set_value(embedding_table) self.set_trainable(trainable) logger.info("Finish loading embedding vector.") s = "Token Embedding info:\ \nUnknown index: {}\ \nUnknown token: {}\ \nPadding index: {}\ \nPadding token: {}\ \nShape :{}".format(self._word_to_idx[self.unknown_token], self.unknown_token, self._word_to_idx[PAD_TOKEN], PAD_TOKEN, self.weight.shape) logger.info(s)
def __init__(self, dim, max_position_embeddings=512): super().__init__() inv_freq = 1.0 / (10000**(paddle.arange( 0, dim, 2, dtype=paddle.get_default_dtype()) / dim)) t = paddle.arange( max_position_embeddings, dtype=paddle.get_default_dtype()) freqs = paddle.matmul(t.unsqueeze(1), inv_freq.unsqueeze(0)) self.register_buffer("sin", freqs.sin(), persistable=False) self.register_buffer("cos", freqs.cos(), persistable=False)
def _construct_input_data(self, mask_num, vocab_size, batch_size): prediction_scores = np.random.rand( mask_num, vocab_size).astype(paddle.get_default_dtype()) seq_relationship_score = np.random.rand( batch_size, 2).astype(paddle.get_default_dtype()) masked_lm_labels = np.random.randint(0, vocab_size, (mask_num, 1)) next_sentence_labels = np.random.randint(0, 2, (batch_size, 1)) masked_lm_scale = 1.0 masked_lm_weights = np.random.randint( 0, 2, (mask_num)).astype(paddle.get_default_dtype()) return prediction_scores, seq_relationship_score, masked_lm_labels, \ next_sentence_labels, masked_lm_scale, masked_lm_weights
def prepare_attention_mask_for_generation(input_ids, pad_token_id, eos_token_id): is_pad_token_in_inputs_ids = (pad_token_id is not None) and ( pad_token_id in input_ids) is_pad_token_not_equal_to_eos_token_id = (eos_token_id is None) or ( (eos_token_id is not None) and (pad_token_id != eos_token_id)) if is_pad_token_in_inputs_ids and is_pad_token_not_equal_to_eos_token_id: attention_mask = (input_ids == self.pad_token_id ).astype(paddle.get_default_dtype()) * -1e9 else: attention_mask = paddle.zeros_like( input_ids, dtype=paddle.get_default_dtype()) return paddle.unsqueeze(attention_mask, axis=[1, 2])
def forward(self, src_word): src_max_len = paddle.shape(src_word)[-1] src_slf_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 trg_src_attn_bias = src_slf_attn_bias src_pos = paddle.cast(src_word != self.bos_id, dtype="int64") * paddle.arange(start=0, end=src_max_len) # Run encoder src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout(src_emb, p=self.dropout, training=False) if self.dropout else src_emb enc_output = self.transformer.encoder(enc_input, src_slf_attn_bias) # Init states (caches) for transformer, need to be updated according to selected beam incremental_cache, static_cache = self.transformer.decoder.gen_cache( enc_output, do_zip=True) static_cache, enc_output, trg_src_attn_bias = TransformerBeamSearchDecoder.tile_beam_merge_with_batch( (static_cache, enc_output, trg_src_attn_bias), self.beam_size) rs, _ = nn.decode.dynamic_decode(decoder=self.decode, inits=incremental_cache, max_step_num=self.max_out_len, memory=enc_output, trg_src_attn_bias=trg_src_attn_bias, static_cache=static_cache, is_test=True) return rs
def __init__(self, root, loader=None, extensions=None, transform=None, is_valid_file=None): self.root = root self.transform = transform if extensions is None: extensions = IMG_EXTENSIONS classes, class_to_idx = self._find_classes(self.root) samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file) if len(samples) == 0: raise (RuntimeError("Found 0 directories in subfolders of: " + self.root + "\n" "Supported extensions are: " + ",".join(extensions))) self.loader = default_loader if loader is None else loader self.extensions = extensions self.classes = classes self.class_to_idx = class_to_idx self.samples = samples self.targets = [s[1] for s in samples] self.dtype = paddle.get_default_dtype()
def __init__(self, data_file=None, mode='train', transform=None, download=True, backend=None): assert mode.lower() in ['train', 'test', 'train', 'test'], \ "mode should be 'train10', 'test10', 'train100' or 'test100', but got {}".format(mode) self.mode = mode.lower() if backend is None: backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( "Expected backend are one of ['pil', 'cv2'], but got {}". format(backend)) self.backend = backend self._init_url_md5_flag() self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, self.data_url, self.data_md5, 'cifar', download) self.transform = transform # read dataset into memory self._load_data() self.dtype = paddle.get_default_dtype()
def prepare_input(insts, pad_id): src, src_length = Pad(pad_val=pad_id, ret_length=True)([inst[0] for inst in insts]) tgt, tgt_length = Pad(pad_val=pad_id, ret_length=True)([inst[1] for inst in insts]) tgt_mask = (tgt[:, :-1] != pad_id).astype(paddle.get_default_dtype()) return src, src_length, tgt[:, :-1], tgt[:, 1:, np.newaxis], tgt_mask
def degree_norm(self, g): degree = g.indegree() + 1 # self loop norm = paddle.cast(degree, dtype=paddle.get_default_dtype()) norm = paddle.clip(norm, min=1.0) norm = paddle.pow(norm, -0.5) norm = paddle.reshape(norm, [-1, 1]) return norm
def forward(self, src_word): src_max_len = paddle.shape(src_word)[-1] src_slf_attn_bias = paddle.cast( src_word == self.bos_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e9 src_pos = paddle.cast(src_word != self.bos_id, dtype="int64") * paddle.arange(start=0, end=src_max_len) # Run encoder src_emb = self.src_word_embedding(src_word) src_pos_emb = self.src_pos_embedding(src_pos) src_emb = src_emb + src_pos_emb enc_input = F.dropout(src_emb, p=self.dropout, training=False) if self.dropout else src_emb enc_output = self.transformer.encoder(enc_input, src_slf_attn_bias) if self.use_fp16_decoding: enc_output = paddle.cast(enc_output, dtype="float16") mem_seq_lens = paddle.sum(paddle.cast(src_word != self.bos_id, dtype="int32"), axis=1) ids = self.decoding(enc_output, mem_seq_lens) return ids
def forward(self, decoder_input_ids=None, decoder_attention_mask=None, encoder_output=None, memory_mask=None, cache=None): if decoder_attention_mask is None: decoder_length = paddle.shape(decoder_input_ids)[-1] decoder_attention_mask = paddle.tensor.triu( (paddle.full( (decoder_length, decoder_length), -np.inf, dtype=paddle.get_default_dtype())), 1) decoder_inputs_embeds = self.embed_tokens(decoder_input_ids) past_key_values_length = paddle.shape(cache[0][0].k)[ 2] if cache is not None else 0 decoder_inputs_embed_pos = self.decoder_embed_positions( decoder_input_ids.shape, past_key_values_length) hidden_states = decoder_inputs_embeds + decoder_inputs_embed_pos hidden_states = self.decoder_layernorm_embedding(hidden_states) decoder_input = self.decoder_dropout(hidden_states) decoder_output = self.decoder( tgt=decoder_input, memory=encoder_output, tgt_mask=decoder_attention_mask, memory_mask=memory_mask, cache=cache) return decoder_output
def dequantize(quantized, n_bands, dtype=None): """Linearlly dequantize an integer Tensor into a float Tensor in the range [-1, 1). Parameters ----------- quantized : Tensor [dtype: int] The quantized value in the range [0, n_bands). n_bands : int Number of bands. The input integer Tensor's value is in the range [0, n_bans). dtype : str, optional Data type of the output. Returns ----------- Tensor The dequantized tensor, dtype is specified by `dtype`. If `dtype` is not specified, the default float data type is used. """ dtype = dtype or paddle.get_default_dtype() value = (paddle.cast(quantized, dtype) + 0.5) * (2.0 / n_bands) - 1.0 return value
def __init__(self, n_fft, hop_length, win_length, window="hanning"): super(STFT, self).__init__() self.hop_length = hop_length self.n_bin = 1 + n_fft // 2 self.n_fft = n_fft # calculate window window = signal.get_window(window, win_length) if n_fft != win_length: pad = (n_fft - win_length) // 2 window = np.pad(window, ((pad, pad), ), 'constant') # calculate weights r = np.arange(0, n_fft) M = np.expand_dims(r, -1) * np.expand_dims(r, 0) w_real = np.reshape( window * np.cos(2 * np.pi * M / n_fft)[:self.n_bin], (self.n_bin, 1, 1, self.n_fft)) w_imag = np.reshape( window * np.sin(-2 * np.pi * M / n_fft)[:self.n_bin], (self.n_bin, 1, 1, self.n_fft)) w = np.concatenate([w_real, w_imag], axis=0) self.weight = paddle.cast(paddle.to_tensor(w), paddle.get_default_dtype())
def forward(self, input_ids, attention_mask=None): """ Returns: Tensor: The last hidden states at the last layer of the encoder. It's data type should be `float` and has a shape of `(batch_size, seq_lens, hidden_size)`. ``seq_lens`` corresponds to the length of input sequence. """ if input_ids is None: raise ValueError("Input_ids cannot be None.") inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale inputs_embed_pos = self.encoder_embed_positions(input_ids.shape) hidden_states = inputs_embeds + inputs_embed_pos encoder_input = self.encoder_dropout(hidden_states) if attention_mask is None: attention_mask = paddle.cast( input_ids == self.pad_token_id, dtype=paddle.get_default_dtype()).unsqueeze([1, 2]) * -1e4 attention_mask.stop_gradient = True encoder_output = self.encoder(encoder_input, src_mask=attention_mask) # Different from BlenderbotSmall, Blenderbot Encoder apply the final layer norm on encoder output encoder_output = self.encoder_layernorm(encoder_output) return encoder_output
def degree_norm(graph, mode="indegree"): """Calculate the degree normalization of a graph Args: graph: the graph object from (:code:`Graph`) mode: which degree to be normalized ("indegree" or "outdegree") return: A tensor with shape (num_nodes, 1). """ assert mode in [ 'indegree', 'outdegree' ], "The degree_norm mode should be in ['indegree', 'outdegree']. But recieve mode=%s" % mode if mode == "indegree": degree = graph.indegree() elif mode == "outdegree": degree = graph.outdegree() norm = paddle.cast(degree, dtype=paddle.get_default_dtype()) norm = paddle.clip(norm, min=1.0) norm = paddle.pow(norm, -0.5) norm = paddle.reshape(norm, [-1, 1]) return norm
def forward(self, src, src_length): encoder_output, encoder_final_state = self.encoder(src, src_length) encoder_final_state = [(encoder_final_state[0][i], encoder_final_state[1][i]) for i in range(self.num_layers)] # Initial decoder initial states decoder_initial_states = [ encoder_final_state, self.decoder.lstm_attention.cell.get_initial_states( batch_ref=encoder_output, shape=[self.hidden_size]) ] # Build attention mask to avoid paying attention on paddings src_mask = (src != self.eos_id).astype(paddle.get_default_dtype()) encoder_padding_mask = (src_mask - 1.0) * self.INF encoder_padding_mask = paddle.unsqueeze(encoder_padding_mask, [1]) # Tile the batch dimension with beam_size encoder_output = nn.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output, self.beam_size) encoder_padding_mask = nn.BeamSearchDecoder.tile_beam_merge_with_batch( encoder_padding_mask, self.beam_size) # Dynamic decoding with beam search seq_output, _ = nn.dynamic_decode( decoder=self.beam_search_decoder, inits=decoder_initial_states, max_step_num=self.max_out_len, encoder_output=encoder_output, encoder_padding_mask=encoder_padding_mask) return seq_output
def __init__(self, data_file=None, mode='train', transform=None, download=True, backend=None): assert mode.lower() in ['train', 'valid', 'test'], \ "mode should be 'train', 'valid' or 'test', but got {}".format(mode) if backend is None: backend = paddle.vision.get_image_backend() if backend not in ['pil', 'cv2']: raise ValueError( "Expected backend are one of ['pil', 'cv2'], but got {}" .format(backend)) self.backend = backend self.flag = MODE_FLAG_MAP[mode.lower()] self.data_file = data_file if self.data_file is None: assert download, "data_file is not set and downloading automatically is disabled" self.data_file = _check_exists_and_download( data_file, VOC_URL, VOC_MD5, CACHE_DIR, download) self.transform = transform # read dataset into memory self._load_anno() self.dtype = paddle.get_default_dtype()
def forward(self, src, src_length, trg, trg_length): # Encoder _, enc_final_state = self.encoder(src, src_length) # Build distribution z_mean, z_log_var = self.build_distribution(enc_final_state) # Decoder latent_z = self.sampling(z_mean, z_log_var) dec_first_hidden_cell = self.fc(latent_z) dec_first_hidden, dec_first_cell = paddle.split(dec_first_hidden_cell, 2, axis=-1) if self.num_layers > 1: dec_first_hidden = paddle.split(dec_first_hidden, self.num_layers) dec_first_cell = paddle.split(dec_first_cell, self.num_layers) else: dec_first_hidden = [dec_first_hidden] dec_first_cell = [dec_first_cell] dec_initial_states = [[h, c] for h, c in zip(dec_first_hidden, dec_first_cell) ] dec_output = self.decoder(trg, dec_initial_states, latent_z) kl_loss = self.calc_kl_dvg(z_mean, z_log_var) trg_mask = (self.PAD_ID != trg).astype(paddle.get_default_dtype()) return kl_loss, dec_output, trg_mask
def test_with_input_lengths(self): mp = self.mp.clone() sp = self.sp rnn1 = self.rnn1 rnn2 = self.rnn2 exe = self.executor scope = self.scope x = np.random.randn(12, 4, 16) if not self.time_major: x = np.transpose(x, [1, 0, 2]) sequence_length = np.array([12, 10, 9, 8], dtype=np.int64) y1, (h1, c1) = rnn1(x, sequence_length=sequence_length) with paddle.fluid.unique_name.guard(): with paddle.static.program_guard(mp, sp): x_data = paddle.data( "input", [-1, -1, 16], dtype=paddle.framework.get_default_dtype()) seq_len = paddle.data("seq_len", [-1], dtype="int64") mask = sequence_mask(seq_len, dtype=paddle.get_default_dtype()) if self.time_major: mask = paddle.transpose(mask, [1, 0]) y, (h, c) = rnn2(x_data, sequence_length=seq_len) y = paddle.multiply(y, mask, axis=0) feed_dict = {x_data.name: x, seq_len.name: sequence_length} with paddle.static.scope_guard(scope): y2, h2, c2 = exe.run(mp, feed=feed_dict, fetch_list=[y, h, c]) np.testing.assert_allclose(y1, y2, atol=1e-8, rtol=1e-5) np.testing.assert_allclose(h1, h2, atol=1e-8, rtol=1e-5) np.testing.assert_allclose(c1, c2, atol=1e-8, rtol=1e-5)
def get_random_case(self): label = np.random.randint( self.cls_num, size=self.label_shape).astype("int64") logits = np.random.uniform( 0.1, 1.0, self.shape).astype(paddle.get_default_dtype()) pred = np.apply_along_axis(stable_softmax, -1, logits) seq_mask = np.random.randint(2, size=self.label_shape).astype("int64") return label, logits, pred, seq_mask
def seq2feats(self, log_seqs, time_matrices): seqs = self.item_emb(log_seqs) seqs *= self.item_emb._embedding_dim**0.5 seqs = self.item_emb_dropout(seqs) positions = paddle.arange(log_seqs.shape[1]).unsqueeze(0).expand( [log_seqs.shape[0], -1]) abs_pos_K = self.abs_pos_K_emb(positions) abs_pos_V = self.abs_pos_V_emb(positions) abs_pos_K = self.abs_pos_K_emb_dropout(abs_pos_K) abs_pos_V = self.abs_pos_V_emb_dropout(abs_pos_V) time_matrix_K = self.time_matrix_K_emb(time_matrices) time_matrix_V = self.time_matrix_V_emb(time_matrices) time_matrix_K = self.time_matrix_K_dropout(time_matrix_K) time_matrix_V = self.time_matrix_V_dropout(time_matrix_V) # mask 0th items(placeholder for dry-run) in log_seqs # would be easier if 0th item could be an exception for training timeline_mask = log_seqs == 0 seqs *= (log_seqs != 0).astype(paddle.get_default_dtype()).unsqueeze( -1) # broadcast in last dim tl = seqs.shape[1] # time dim len for enforce causality attention_mask = ( paddle.tril(paddle.ones([tl, tl])) == 0).astype(paddle.bool) for i in range(len(self.attention_layers)): # Self-attention, Q=layernorm(seqs), K=V=seqs # seqs = paddle.transpose(seqs, 0, 1) # (N, T, C) -> (T, N, C) Q = self.attention_layernorms[i](seqs) mha_outputs = self.attention_layers[i]( Q, seqs, timeline_mask, attention_mask, time_matrix_K, time_matrix_V, abs_pos_K, abs_pos_V) seqs = Q + mha_outputs # seqs = paddle.transpose(seqs, 0, 1) # (T, N, C) -> (N, T, C) # Point-wise Feed-forward, actually 2 Conv1D for channel wise fusion seqs = self.forward_layernorms[i](seqs) seqs = self.forward_layers[i](seqs) seqs *= (timeline_mask.astype(int) == 0 ).astype(paddle.get_default_dtype()).unsqueeze(-1) log_feats = self.last_layernorm(seqs) return log_feats
def to_tensor(pic): """Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor. See :class:`~paddlevision.transforms.ToTensor` for more details. Args: pic (PIL Image or numpy.ndarray): Image to be converted to tensor. Returns: Tensor: Converted image. """ if not (F_pil._is_pil_image(pic) or _is_numpy(pic)): raise TypeError('pic should be PIL Image or ndarray. Got {}'.format( type(pic))) if _is_numpy(pic) and not _is_numpy_image(pic): raise ValueError( 'pic should be 2/3 dimensional. Got {} dimensions.'.format( pic.ndim)) default_float_dtype = paddle.get_default_dtype() if isinstance(pic, np.ndarray): # handle numpy array if pic.ndim == 2: pic = pic[:, :, None] img = paddle.to_tensor(pic.transpose((2, 0, 1))) # backward compatibility if not img.dtype == default_float_dtype: img = img.astype(dtype=default_float_dtype) return img.divide(paddle.full_like(img, 255)) else: return img if accimage is not None and isinstance(pic, accimage.Image): nppic = np.zeros([pic.channels, pic.height, pic.width], dtype=np.float32) pic.copyto(nppic) return paddle.to_tensor(nppic).astype(dtype=default_float_dtype) # handle PIL Image mode_to_nptype = {'I': np.int32, 'I;16': np.int16, 'F': np.float32} img = paddle.to_tensor( np.array(pic, mode_to_nptype.get(pic.mode, np.uint8), copy=True)) if pic.mode == '1': img = 255 * img img = img.reshape([pic.size[1], pic.size[0], len(pic.getbands())]) if not img.dtype == default_float_dtype: img = img.astype(dtype=default_float_dtype) # put it from HWC to CHW format img = img.transpose((2, 0, 1)) return img.divide(paddle.full_like(img, 255)) else: # put it from HWC to CHW format img = img.transpose((2, 0, 1)) return img
def __init__(self, emb_dim, max_length): super(PositionalEmbedding, self).__init__() self.emb_dim = emb_dim self.pos_encoder = nn.Embedding(num_embeddings=max_length, embedding_dim=self.emb_dim) self.pos_encoder.weight.set_value( position_encoding_init(max_length, self.emb_dim, dtype=paddle.get_default_dtype()))
def setUp(self): self.original_dtyep = paddle.get_default_dtype() # MIOPEN not support data type of double if core.is_compiled_with_rocm(): paddle.set_default_dtype("float32") else: paddle.set_default_dtype("float64") self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda() and core.op_support_gpu("batch_norm"): self.places.append(fluid.CUDAPlace(0))
def __init__( self, image_root, cls_label_path, transform_ops=None, ): self._img_root = image_root self._cls_path = cls_label_path if transform_ops: self._transform_ops = create_operators(transform_ops) self._dtype = paddle.get_default_dtype() self._load_anno()
def get_multi_labels_random_case(self): label = np.random.randint( self.cls_num, size=self.label_shape).astype("int64") pred = np.random.uniform(0.1, 1.0, self.shape).astype(paddle.get_default_dtype()) np_label = label.reshape(-1) np_pred = pred.reshape(-1, self.cls_num).argmax(axis=1) average_type = ['micro', 'macro', 'weighted', None] pos_label = np.random.randint(0, self.cls_num) return label, pred, np_label, np_pred, average_type[np.random.randint( 0, 3)], pos_label
def greedy_search(self, input_ids, logits_processors, max_length, pad_token_id, eos_token_id, **model_kwargs): batch_size, cur_len = input_ids.shape origin_len = cur_len unfinished_flag = paddle.full([batch_size, 1], True, dtype='bool') scores = paddle.full([batch_size, 1], 0.0, dtype=paddle.get_default_dtype()) while cur_len < max_length: # prepare model inputs & get model output model_inputs = self.prepare_inputs_for_generation( input_ids, **model_kwargs) outputs = self(**model_inputs) logits = outputs[0] if isinstance(outputs, tuple) else outputs # [batch_size, vocab_size] logits = logits[:, -1, :] # pre-process distribution logits = self.adjust_logits_during_generation(logits) logits = logits_processors(input_ids, logits) # greedy probs = F.softmax(logits) probs = paddle.log(probs) next_tokens = paddle.argmax(probs, axis=-1).unsqueeze(-1) next_scores = paddle.index_sample(probs, next_tokens) if eos_token_id is not None: next_tokens = paddle.where( unfinished_flag, next_tokens, paddle.full_like(next_tokens, pad_token_id)) scores = self.update_scores_for_generation(scores, next_scores, cur_len - origin_len, unfinished_flag) cur_len += 1 input_ids = paddle.concat([input_ids, next_tokens], axis=1) if eos_token_id is not None: unfinished_flag = paddle.logical_and( unfinished_flag, next_tokens != eos_token_id) # Stop when there is a </s> in all sentences if not paddle.any(unfinished_flag): break model_kwargs = self.update_model_kwargs_for_generation( outputs, model_kwargs) return input_ids[:, origin_len:], scores
def _create_global_learning_rate(self): if isinstance(self._learning_rate, _LRScheduler): lr_var = self._global_learning_rate() # only create global lr_var once if not isinstance(lr_var, framework.Variable): lr_name = unique_name.generate('learning_rate') self._learning_rate._var_name = lr_name lr_var = self.helper.create_global_variable( name=lr_name, shape=[1], persistable=True, stop_gradient=True, dtype=paddle.get_default_dtype() if self._dtype is None else self._dtype) main_prog = framework.default_main_program() main_prog.lr_sheduler = self._learning_rate main_prog.lr_var = lr_var self._learning_rate_map[framework.default_main_program( )] = lr_var lr_value = float(self._learning_rate()) self.helper.set_variable_initializer( lr_var, initializer=Constant(value=lr_value)) elif isinstance(self._learning_rate, float): # only create global lr_var once lr = self._global_learning_rate() if isinstance(lr, framework.Variable): return else: self._learning_rate_map[framework.default_main_program( )] = layers.create_global_var( name=unique_name.generate("learning_rate"), shape=[1], value=float(self._learning_rate), dtype=paddle.get_default_dtype() if self._dtype is None else self._dtype, persistable=True)
def sample_from_softmax(self, logits, use_softmax_sample=True): if use_softmax_sample: #uniform_noise = paddle.uniform(logits.shape, dtype="float32", min=0, max=1) uniform_noise = paddle.rand(logits.shape, dtype=paddle.get_default_dtype()) gumbel_noise = -paddle.log(-paddle.log(uniform_noise + 1e-9) + 1e-9) else: gumbel_noise = paddle.zeros_like(logits) # softmax_sample equal to sampled_tokids.unsqueeze(-1) softmax_sample = paddle.argmax(F.softmax(logits + gumbel_noise), axis=-1) # one hot return F.one_hot(softmax_sample, logits.shape[-1])