def __init__(self): print('【正在读入数据集】') self.dataset = Dataset() print('【正在读入网络】') latest = tf.train.latest_checkpoint('./check') self.transformer = Transformer() self.transformer.load_weights(latest)
class Translator: def __init__(self): print('【正在读入数据集】') self.dataset = Dataset() print('【正在读入网络】') latest = tf.train.latest_checkpoint('./check') self.transformer = Transformer() self.transformer.load_weights(latest) def evaluate(self, inp_sentence): start_token = [self.transformer.tokenizer_pt.vocab_size] end_token = [self.dataset.tokenizer_pt.vocab_size + 1] # inp sentence is portuguese, hence adding the start and end token inp_sentence = start_token + self.dataset.tokenizer_pt.encode(inp_sentence) + end_token encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is english, the first word to the transformer should be the # english start token. decoder_input = [self.dataset.tokenizer_en.vocab_size] output = tf.expand_dims(decoder_input, 0) for i in range(MAX_LENGTH): enc_padding_mask, combined_mask, dec_padding_mask = create_mask( encoder_input, output) # predictions.shape == (batch_size, seq_len, vocab_size) predictions, attention_weights = self.transformer(encoder_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) # select the last word from the seq_len dimension predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) # return the result if the predicted_id is equal to the end token if predicted_id == self.dataset.tokenizer_en.vocab_size + 1: return tf.squeeze(output, axis=0), attention_weights # concatenate the predicted_id to the output which is given to the decoder # as its input. output = tf.concat([output, predicted_id], axis=-1) return tf.squeeze(output, axis=0), attention_weights def translate(self, sentence, plot=''): result, attention_weights = self.evaluate(sentence) predicted_sentence = self.dataset.tokenizer_en.decode([i for i in result if i < self.dataset.tokenizer_en.vocab_size]) print('Input: {}'.format(sentence)) print('Predicted translation: {}'.format(predicted_sentence)) if plot: pass
def synthesis(text_list, args): """ Attention !!! During training procedure, we can feed the whole right-shifted target sentence at once However, during testing, the model has to output the word step by step a b c [eos] ^ ^ ^ ^ | | | | [start] a b c """ # restore model m = Transformer() m.load_state_dict(load_checkpoint(args.restore, "transformer")) for text in text_list: print("[INPUT] " + text) source_seq = [] for w in text.split(" "): source_seq.append(_vocab_to_id[w]) source_seq.append(_vocab_to_id["-"]) # add eos source_tensor = t.LongTensor( np.asarray(source_seq)).unsqueeze(0).cuda() decoder_input = t.zeros([1, 1]).long().cuda() pos_source = t.arange(1, source_tensor.size(1) + 1).long().unsqueeze(0).cuda() m = m.cuda() m.eval() pbar = range(args.max_len) with t.no_grad(): for i in pbar: pos_target = t.arange(1, decoder_input.size(1) + 1).unsqueeze(0).cuda() pred, attn, attn_enc, attn_dec = m.forward( source_tensor, decoder_input, pos_source, pos_target) # get the latest word from decoder output output_word_idx = t.topk(F.softmax(pred[:, -1, :], dim=1), k=1)[1] if output_word_idx.squeeze().cpu() == 1: # output eos break # put it at the end of the decoder input and the predict next word decoder_input = t.cat([decoder_input, output_word_idx], dim=1) output_sentence = [] for w in decoder_input.squeeze().cpu().numpy()[1:]: output_sentence.append(_id_to_vocab[w]) print("[OUTPUT] " + " ".join(output_sentence))
async def download_content(message: types.Message, state: FSMContext): await message.photo[-1].download('test.jpg') import torch import numpy as np from PIL import Image import torchvision.transforms as transforms from torch.autograd import Variable from network import Transformer valid_ext = ['.jpg', '.png'] model = Transformer() # dict = wget.download('http://vllab1.ucmerced.edu/~yli62/CartoonGAN/pytorch_pth/Hayao_net_G_float.pth') dict = 'Hayao_net_G_float.pth' model.load_state_dict(torch.load(dict)) #model = torch.load('model.pth') model.eval() model.float() # load image input_image = Image.open('test.jpg').convert("RGB") # resize image, keep aspect ratio h = input_image.size[0] w = input_image.size[1] ratio = h * 1.0 / w if ratio > 1: h = 310 w = int(h * 1.0 / ratio) else: w = 310 h = int(w * ratio) input_image = input_image.resize((h, w), Image.BICUBIC) input_image = np.asarray(input_image) input_image = input_image[:, :, [2, 1, 0]] input_image = transforms.ToTensor()(input_image).unsqueeze(0) input_image = -1 + 2 * input_image with torch.no_grad(): input_image = Variable(input_image).float() output_image = model(input_image) output_image = output_image[0] output_image = output_image[[2, 1, 0], :, :] output_image = output_image.data.cpu().float() * 0.5 + 0.5 output_image = output_image.squeeze(0) # функция для отрисовки изображения unloader = transforms.ToPILImage() output_image = unloader(output_image) output_image.save('gan.jpg') media = types.MediaGroup() media.attach_photo(types.InputFile('gan.jpg')) await message.reply_media_group(media=media) await message.reply("Ready! To continue choose /gan or /style_transfer") await state.finish()
def look_transformer(device: str = "cpu"): sample_transformer = Transformer( num_layers=2, features=512, num_heads=8, fffetures=2048, input_vocab_size=8500, target_vocab_size=8000, pe_input=10000, pe_target=6000, ).to(device) temp_input = torch.rand(64, 38).type(torch.LongTensor).to(device) temp_target = torch.rand(64, 36).type(torch.LongTensor).to(device) fn_out, _ = sample_transformer( temp_input, temp_target, enc_padding_mask=None, look_ahead_mask=None, dec_padding_mask=None, ) print( "Transformer Shape", fn_out.shape ) # (batch_size, tar_seq_len, target_vocab_size)
def load_model(): model = Transformer() model.load_state_dict( torch.load(os.path.join(opt.model_path, opt.style + '_net_G_float.pth'))) model.eval() # if opt.gpu > -1: # print('GPU mode') # model.cuda() # else: # print('CPU mode') model.float() return model
def evaluate(inp_sentence): """... Args: inp_sentence : ... Returns: tf.squeeze(output, axis=0) : ... attention_weights : ... """ start_token = [tokenizer_in.size] end_token = [tokenizer_in.size + 1] # inp sentence is portuguese, hence adding the start and end token inp_sentence = start_token + tokenizer_in.encode(inp_sentence) + end_token encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is english, the first word to the transformer should be the # english start token. decoder_input = [tokenizer_out.size] output = tf.expand_dims(decoder_input, 0) for _ in range(MAX_LENGTH): enc_padding_mask, combined_mask, dec_padding_mask = create_masks( encoder_input, output) # predictions.shape == (batch_size, seq_len, vocab_size) predictions, attention_weights = Transformer(encoder_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) # select the last word from the seq_len dimension predictions = predictions[:, -1:, :] # (batch_size, 1, vocab_size) predicted_id = tf.cast(tf.argmax(predictions, axis=-1), tf.int32) # return the result if the predicted_id is equal to the end token if predicted_id == tokenizer_out.size + 1: return tf.squeeze(output, axis=0), attention_weights # concatentate the predicted_id to the output which is given to the decoder # as its input. output = tf.concat([output, predicted_id], axis=-1) return tf.squeeze(output, axis=0), attention_weights
def __init__(self): self.num_layers = 4 self.d_model = 128 self.dff = 512 self.num_heads = 8 self.dataset = Dataset().build() self.input_vocab_size = self.dataset.tokenizer_pt.vocab_size + 2 self.target_vocab_size = self.dataset.tokenizer_en.vocab_size + 2 self.max_seq_len = 40 self.dropout_rate = 0.1 self.learning_rate = CustomSchedule(self.d_model) self.optimizer = tf.keras.optimizers.Adam(self.learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none') self.train_loss = tf.keras.metrics.Mean(name='train_loss') self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') self.transformer = Transformer(self.num_layers, self.d_model, self.num_heads, self.dff, self.input_vocab_size, self.target_vocab_size, self.max_seq_len, self.dropout_rate) # checkpoint管理器 checkpoint = tf.train.Checkpoint(transformer=self.transformer, optimizer=self.optimizer) checkpoint_path = './check' self.checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_path, max_to_keep=3) if self.checkpoint_manager.latest_checkpoint: checkpoint.restore(self.checkpoint_manager.latest_checkpoint) print('last checkpoint restore')
from network import Node, Line, Transformer, Network from network_utils import check_branches_and_nodes, print_inductance_array from newton_raphson import NR from fast_decoupled import FDPF # Step1, define network parameters(lines and transformers) # Here we use our example parameter set from examples_example_input import parameter_set2 as parameter_set nodes, lines, transformers, extra_branches = parameter_set nodes = [Node.from_str(node.strip()) for node in nodes.strip().split("\n")] transformers = [ Transformer.from_str(transformer.strip()) for transformer in transformers.strip().split("\n") ] # drop them in case there are some blank lines accidentally transformers = [ transformer for transformer in transformers if transformer is not None ] lines = [Line.from_str(line.strip()) for line in lines.strip().split("\n")] # drop them in case there are some blank lines accidentally lines = [line for line in lines if line is not None] branches = transformers + lines # Both are lists, simply added together # Step 2, check the parameters and form a network check_branches_and_nodes(branches, nodes) network = Network(nodes, branches, extra_branches) print("Inductance Array after node re-index:\n{}\n".format(network.Y))
def main(): global_step = 0 # multi GPUs m = nn.DataParallel(Transformer().cuda()) # apply batchNorm and Dropout # if using m.eval(), the batch normaliaztion and droput calculation would be unavailable m.train() optimizer = t.optim.Adam(m.parameters(), lr=hp.lr) # use tensorboard to record the training information writer = SummaryWriter() for epoch in tqdm(range(hp.epochs)): """ # if use dataloader to collect the training sample dataloader = DataLoader(dataset, batch_size=hp.batch_size, shuffle=True) for i, data in enumerate(dataloader): pbar.set_description("Processing at epoch %d"%epoch) """ # I use a simple example here for itera in range(10): global_step += 1 if global_step < 400000: adjust_learning_rate(optimizer, global_step) # how are you eos -> # what is your name eos -> source_seq = t.Tensor([[2, 3, 4, 1, 0], [5, 6, 7, 8, 1]]).long() # -> I am fine , thanks eos # -> Bob eos target_seq = t.Tensor([[9, 10, 11, 12, 13, 1], [14, 1, 0, 0, 0, 0]]).long() # right-shifted target sequence target_seq_input = t.Tensor([[0, 9, 10, 11, 12, 13], [0, 14, 1, 0, 0, 0]]).long() source_pos = t.Tensor([[1, 2, 3, 4, 0], [1, 2, 3, 4, 5]]).long() target_pos = t.Tensor([[1, 2, 3, 4, 5, 6], [1, 2, 0, 0, 0, 0]]).long() source_seq = source_seq.cuda() target_seq = target_seq.cuda() target_seq_input = target_seq_input.cuda() source_pos = source_pos.cuda() target_pos = target_pos.cuda() pred_logit, attn_probs, attns_enc, attns_dec = m.forward( source_seq, target_seq_input, source_pos, target_pos) # reshape the pred tensor to (B, T * vocab_size) loss = nn.CrossEntropyLoss()(pred_logit.reshape( -1, pred_logit.size()[2]), target_seq.reshape(-1)) writer.add_scalars('training_loss', { 'loss': loss, }, global_step) """ # You can draw the attention map here if global_step % hp.image_step == 1: for i, prob in enumerate(attn_probs): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j*16] * 255) writer.add_image('Attention_%d_0'%global_step, x, i*4+j) for i, prob in enumerate(attns_enc): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j*16] * 255) writer.add_image('Attention_enc_%d_0'%global_step, x, i*4+j) for i, prob in enumerate(attns_dec): num_h = prob.size(0) for j in range(4): x = vutils.make_grid(prob[j*16] * 255) writer.add_image('Attention_dec_%d_0'%global_step, x, i*4+j) """ optimizer.zero_grad() # Calculate gradients loss.backward() nn.utils.clip_grad_norm_(m.parameters(), 1.) # Update weights optimizer.step() if global_step % hp.save_step == 0: if not os.path.exists(hp.checkpoint_path): os.mkdir(hp.checkpoint_path) t.save( { 'model': m.state_dict(), 'optimizer': optimizer.state_dict() }, os.path.join( hp.checkpoint_path, 'checkpoint_transformer_%d.pth.tar' % global_step))
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.Mean(name='train_accuracy') transformer = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, rate=dropout_rate) # MAIN EXECUTION. EPOCHS = 5 for epoch in range(EPOCHS): start = time.time() train_loss.reset_states() train_accuracy.reset_states() # inp -> portuguese, tar -> english
import numpy as np import wget from PIL import Image import torchvision.transforms as transforms from torch.autograd import Variable from network import Transformer valid_ext = ['.jpg', '.png'] # load pretrained model model = Transformer() #dict = wget.download('http://vllab1.ucmerced.edu/~yli62/CartoonGAN/pytorch_pth/Hayao_net_G_float.pth') dict = 'Hayao_net_G_float.pth' #dict = wget.download('http://vllab1.ucmerced.edu/~yli62/CartoonGAN/torch_t7/Hayao_net_G_float.t7') model.load_state_dict(torch.load(dict))# + '_net_G_float.pth'))) torch.save(model, 'model.pth') model.eval() model.float() # load image input_image = Image.open('test.jpg').convert("RGB") # resize image, keep aspect ratio h = input_image.size[0] w = input_image.size[1] ratio = h *1.0 / w