def main(): parser = argparse.ArgumentParser() parser.add_argument("--input", default=None, type=str, help="run demo chatbot") args = parser.parse_args() input_sentence = args.input tokenizer = tfds.features.text.SubwordTextEncoder.load_from_file( vocab_filename) # Vocabulary size plus start and end token VOCAB_SIZE = tokenizer.vocab_size + 2 model = Transformer(num_layers=NUM_LAYERS, units=UNITS, d_model=D_MODEL, num_heads=NUM_HEADS, vocab_size=VOCAB_SIZE, dropout=DROPOUT, name='transformer') demo_sentense = 'How are you' predict(demo_sentense, tokenizer, model, True) model.load_weights(save_weight_path) model.summary() tf.keras.utils.plot_model(model, to_file='transformer.png', show_shapes=True) predict(input_sentence, tokenizer, model)
def main(): Place = paddle.fluid.CUDAPlace(0) with fluid.dygraph.guard(Place): model = Transformer(image_size=512, num_classes=15, hidden_unit_num=1024, layer_num=2, head_num=16, dropout=0.8, decoder_name='PUP', hyber=True, visualable=False) preprocess = Transform(512) dataloader_1 = Dataloader('/home/aistudio/dataset', '/home/aistudio/dataset/val_list.txt', transform=preprocess, shuffle=True) val_load = fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False) val_load.set_sample_generator(dataloader_1, batch_size=1, places=Place) model_dic, optic_dic = load_dygraph( "./output/SETR-NotZero-Epoch-2-Loss-0.161517-MIOU-0.325002") model.load_dict(model_dic) model.eval() '''result = get_infer_data("/home/aistudio/dataset/infer") infer_load = Load_infer('/home/aistudio/dataset', result, transform=preprocess, shuffle=False) loader_infer= fluid.io.DataLoader.from_generator(capacity=1, use_multiprocess=False) loader_infer.set_sample_generator(infer_load, batch_size=1, places=Place) process_image(model, loader_infer, result)''' validation(val_load, model, 15)
def __init__(self): super(TMLU, self).__init__() self.global_step = 0 # Data Pipeline data_pipeline = Preprocess(cfg) self.train_dataset, self.val_dataset = data_pipeline.get_data() self.tokenizer_pt = data_pipeline.tokenizer_pt self.tokenizer_en = data_pipeline.tokenizer_en cfg.input_vocab_size = self.tokenizer_pt.vocab_size + 2 cfg.target_vocab_size = self.tokenizer_en.vocab_size + 2 # Model self.transformer = Transformer(cfg) # Optimizer learning_rate = CustomSchedule(cfg.d_model) self.optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) # Loss and Metrics self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') self.train_loss = tf.keras.metrics.Mean(name='train_loss') self.train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') # Build writers for logging self.build_writers() checkpoint_path = "./checkpoints/train" self.ckpt = tf.train.Checkpoint(transformer=self.transformer, optimizer=self.optimizer) self.ckpt_manager = tf.train.CheckpointManager(self.ckpt, checkpoint_path, max_to_keep=5)
def train(args): vocab = data_utils.get_vocab(vocab_file=args.vocab_file, min_freq=args.min_vocab_freq) # vocab = {} # with open(args.vocab_file, mode='r') as infile: # for line in infile: # w, w_id = line.split('\t') # vocab[w] = int(w_id) print('Vocab loaded...') print('VOCAB SIZE = ', len(vocab)) if args.model_type == 'transformer': transformer = Transformer(args=args, vocab=vocab) transformer.train_generator() elif args.model_type == 'rnn': rnn_params = {'rec_cell': 'lstm', 'encoder_dim': 800, 'decoder_dim': 800, 'num_encoder_layers': 2, 'num_decoder_layers': 2 } rnn = RNNSeq2Seq(args=args, rnn_params=rnn_params, vocab=vocab) # rnn.train() rnn.train_keras() elif args.model_type == 'han_rnn': han_rnn = HanRnnSeq2Seq(args=args, vocab=vocab) han_rnn.train() elif args.model_type == 'cnn': cnn = ConvSeq2Seq(args=args, vocab=vocab) cnn.train_keras() return
def __init__(self, config): self.config = config self.prepare_dataloaders(config['data']) # self.model = MLP(config['MLP']) # self.model = MLP_3D(config['MLP']) # self.model = LSTM(config['LSTM']) self.model = Transformer(config['Trans']) print(self.model) self.model_name = config['train']['model_name'] self.checkpoint_dir = './checkpoint_dir/{}/'.format(self.model_name) if not os.path.exists(self.checkpoint_dir): os.mkdir(self.checkpoint_dir) self.tb_log_dir = './tb_log/{}/'.format(self.model_name) if not os.path.exists(self.tb_log_dir): os.mkdir(self.tb_log_dir) self.optimal_metric = 100000 self.cur_metric = 100000 self.loss = nn.MSELoss() self.optim = optim.Adam(self.model.parameters(), lr=self.config['train']['lr'], betas=(0.5, 0.999))
def evaluate(args): label_map = load_label_map(args.dataset) n_classes = 50 if args.dataset == "include": n_classes = 263 if args.use_cnn: dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_test_features"), label_map=label_map, mode="test", ) else: dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_test_keypoints"), use_augs=False, label_map=label_map, mode="test", max_frame_len=169, ) dataloader = data.DataLoader( dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True, ) if args.model == "lstm": config = LstmConfig() if args.use_cnn: config.input_size = CnnConfig.output_dim model = LSTM(config=config, n_classes=n_classes) else: config = TransformerConfig(size=args.transformer_size) if args.use_cnn: config.input_size = CnnConfig.output_dim model = Transformer(config=config, n_classes=n_classes) model = model.to(device) if args.use_pretrained == "evaluate": model, _, _ = load_pretrained(args, n_classes, model) print("### Model loaded ###") else: exp_name = get_experiment_name(args) model_path = os.path.join(args.save_path, exp_name) + ".pth" ckpt = torch.load(model_path) model.load_state_dict(ckpt["model"]) print("### Model loaded ###") test_loss, test_acc = validate(dataloader, model, device) print("Evaluation Results:") print(f"Loss: {test_loss}, Accuracy: {test_acc}")
def change_max_pos_embd(args, new_mpe_size, n_classes): config = TransformerConfig(size=args.transformer_size, max_position_embeddings=new_mpe_size) if args.use_cnn: config.input_size = CnnConfig.output_dim model = Transformer(config=config, n_classes=n_classes) model = model.to(device) return model
def train(args): src_root = args.src_root sr = args.sample_rate dt = args.delta_time batch_size = args.batch_size model_type = args.model_type params = {'N_CLASSES':len(os.listdir(args.src_root)), 'SR':sr, 'DT':dt} models = {'conv1d':Conv1D(**params), 'conv2d':Conv2D(**params), 'lstm': LSTM(**params), 'transformer': Transformer(**params), 'ViT': ViT(**params)} assert model_type in models.keys(), '{} not an available model'.format(model_type) csv_path = os.path.join('logs', '{}_history.csv'.format(model_type)) wav_paths = glob('{}/**'.format(src_root), recursive=True) wav_paths = [x.replace(os.sep, '/') for x in wav_paths if '.wav' in x] classes = sorted(os.listdir(args.src_root)) le = LabelEncoder() le.fit(classes) labels = [os.path.split(x)[0].split('/')[-1] for x in wav_paths] labels = le.transform(labels) wav_train, wav_val, label_train, label_val = train_test_split(wav_paths, labels, test_size=0.1, random_state=0) assert len(label_train) >= args.batch_size, 'Number of train samples must be >= batch_size' if len(set(label_train)) != params['N_CLASSES']: warnings.warn('Found {}/{} classes in training data. Increase data size or change random_state.'.format(len(set(label_train)), params['N_CLASSES'])) if len(set(label_val)) != params['N_CLASSES']: warnings.warn('Found {}/{} classes in validation data. Increase data size or change random_state.'.format(len(set(label_val)), params['N_CLASSES'])) tg = DataGenerator(wav_train, label_train, sr, dt, params['N_CLASSES'], batch_size=batch_size) vg = DataGenerator(wav_val, label_val, sr, dt, params['N_CLASSES'], batch_size=batch_size) model = models[model_type] model.summary() cp = ModelCheckpoint('models/{}.h5'.format(model_type), monitor='val_loss', save_best_only=True, save_weights_only=False, mode='auto', save_freq='epoch', verbose=1) csv_logger = CSVLogger(csv_path, append=False) model.fit(tg, validation_data=vg, epochs=40, verbose=1, callbacks=[csv_logger])
def __init__(self, voc_size_src, voc_size_tar, max_pe, num_encoders, num_decoders, emb_size, num_head, ff_inner=2048, p_dropout=0.1): super(mBART, self).__init__() self.transformer = Transformer.Transformer(voc_size_src, voc_size_tar, max_pe, num_encoders, num_decoders, emb_size, num_head, ff_inner, p_dropout)
def create_model_transformer_b2a(arg, devices_list, eval=False): from models import Transformer resume_dataset = arg.eval_dataset_transformer if eval else arg.dataset resume_b = arg.eval_split_source_trasformer if eval else arg.split_source resume_a = arg.eval_split_trasformer if eval else arg.split resume_epoch = arg.eval_epoch_transformer if eval else arg.resume_epoch transformer = Transformer(in_channels=boundary_num, out_channels=boundary_num) if resume_epoch > 0: load_path = arg.resume_folder + 'transformer_' + resume_dataset + '_' + resume_b + '2' + resume_a + '_' + str( resume_epoch) + '.pth' print('Loading Transformer from ' + load_path) transformer = load_weights(transformer, load_path, devices_list[0]) else: init_weights(transformer, init_type='transformer') # init_weights(transformer) if arg.cuda: transformer = transformer.cuda(device=devices_list[0]) return transformer
def __init__(self, config): super(MultiTaskNMT, self).__init__() self.config = config self.model1 = Transformer(config) self.model2 = Transformer(config) # Identify the language flags for training the model self.lang1 = find_key_from_val(config.id2w, config.lang1) self.lang2 = find_key_from_val(config.id2w, config.lang2) # Weight sharing between the two models # Embedding Layer weight sharing self.model1.embed_word.weight = self.model2.embed_word.weight if config.pshare_decoder_param: self.model1.encoder = self.model2.encoder # Query Linear Layer Weight sharing in transformer encoder for i in range(config.layers): if config.pshare_decoder_param: # Share Decoder Params # Share Query if 'q' in config.share_sublayer: if "self" in config.attn_share: self.model1.decoder.layers[i].self_attention.W_Q.weight = \ self.model2.decoder.layers[i].self_attention.W_Q.weight if "source" in config.attn_share: self.model1.decoder.layers[i].source_attention.W_Q.weight = \ self.model2.decoder.layers[i].source_attention.W_Q.weight # Share Key if 'k' in config.share_sublayer: if "self" in config.attn_share: self.model1.decoder.layers[i].self_attention.W_K.weight = \ self.model2.decoder.layers[i].self_attention.W_K.weight if "source" in config.attn_share: self.model1.decoder.layers[i].source_attention.W_K.weight = \ self.model2.decoder.layers[i].source_attention.W_K.weight # Share Value if 'v' in config.share_sublayer: if "self" in config.attn_share: self.model1.decoder.layers[i].self_attention.W_V.weight = \ self.model2.decoder.layers[i].self_attention.W_V.weight if "source" in config.attn_share: self.model1.decoder.layers[i].source_attention.W_V.weight = \ self.model2.decoder.layers[i].source_attention.W_V.weight # Share last Finishing Linear Layer if 'f' in config.share_sublayer: if "self" in config.attn_share: self.model1.decoder.layers[i].self_attention.finishing_linear_layer.weight = \ self.model2.decoder.layers[i].self_attention.finishing_linear_layer.weight if "source" in config.attn_share: self.model1.decoder.layers[i].source_attention.finishing_linear_layer.weight = \ self.model2.decoder.layers[i].source_attention.finishing_linear_layer.weight # Share LayerNorm self.model1.decoder.layers[ i].ln_1 = self.model2.decoder.layers[i].ln_1 self.model1.decoder.layers[ i].ln_2 = self.model2.decoder.layers[i].ln_2 # Share the linear layers if 'linear' in config.share_sublayer: self.model1.decoder.layers[i].feed_forward = \ self.model2.decoder.layers[i].feed_forward self.model1.decoder.layers[i].ln_3 = \ self.model2.decoder.layers[i].ln_3
def main(): questions, answers = load_conversations() # Build tokenizer using tfds for both questions and answers tokenizer = tfds.features.text.SubwordTextEncoder.build_from_corpus( questions + answers, target_vocab_size=2**13) tokenizer.save_to_file(vocab_filename) # Vocabulary size plus start and end token VOCAB_SIZE = tokenizer.vocab_size + 2 questions, answers = tokenize_and_filter(questions, answers, tokenizer) print('Vocab size: {}'.format(VOCAB_SIZE)) print('Number of samples: {}'.format(len(questions))) # decoder inputs use the previous target as input # remove START_TOKEN from targets dataset = tf.data.Dataset.from_tensor_slices(( { 'inputs': questions }, { 'outputs': answers }, )) dataset = dataset.cache() dataset = dataset.shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE) print(dataset) model = Transformer(num_layers=NUM_LAYERS, units=UNITS, d_model=D_MODEL, num_heads=NUM_HEADS, vocab_size=VOCAB_SIZE, dropout=DROPOUT, name='transformer') learning_rate = CustomSchedule(D_MODEL) optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) ckpt = tf.train.Checkpoint(model=model, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) # if a checkpoint exists, restore the latest checkpoint. if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print('Latest checkpoint restored!!') train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') for epoch in range(EPOCHS): start = time.time() train_loss.reset_states() train_accuracy.reset_states() for (batch, (inp, tar)) in enumerate(dataset): train_step(inp, tar, model, optimizer, train_loss, train_accuracy) if batch % 500 == 0: print('Epoch {} Batch {} Loss {:.4f} Accuracy {:.4f}'.format( epoch + 1, batch, train_loss.result(), train_accuracy.result())) if (epoch + 1) % 5 == 0: ckpt_save_path = ckpt_manager.save() print('Saving checkpoint for epoch {} at {}'.format( epoch + 1, ckpt_save_path)) print('Epoch {} Loss {:.4f} Accuracy {:.4f}'.format( epoch + 1, train_loss.result(), train_accuracy.result())) print('Time taken for 1 epoch: {} secs\n'.format(time.time() - start)) model.save_weights(save_weight_path) #model.summary() input_sentence = 'Where have you been?' predict(input_sentence, tokenizer, model) sentence = 'I am not crazy, my mother had me tested.' for _ in range(5): sentence = predict(sentence, tokenizer, model) print('')
keypoints_dir=save_dir, max_frame_len=169, ) dataloader = data.DataLoader( dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True, ) label_map = dict(zip(label_map.values(), label_map.keys())) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = TransformerConfig(size="large", max_position_embeddings=256) model = Transformer(config=config, n_classes=263) model = model.to(device) pretrained_model_name = "include_no_cnn_transformer_large.pth" pretrained_model_links = load_json("pretrained_links.json") if not os.path.isfile(pretrained_model_name): link = pretrained_model_links[pretrained_model_name] torch.hub.download_url_to_file(link, pretrained_model_name, progress=True) ckpt = torch.load(pretrained_model_name) model.load_state_dict(ckpt["model"]) print("### Model loaded ###") preds = inference(dataloader, model, device, label_map) print(json.dumps(preds, indent=2))
no_up = 0 # 定义计算图 print('---start graph---') with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) session_conf.gpu_options.allow_growth = True session_conf.gpu_options.per_process_gpu_memory_fraction = 0.9 # 配置gpu占用率 sess = tf.Session(config=session_conf) # 定义会话 with sess.as_default(): transformer = Transformer(config, wordEmbedding) globalStep = tf.Variable(0, name="globalStep", trainable=False) # 定义优化函数,传入学习速率参数 optimizer = tf.train.MomentumOptimizer( learning_rate=config.training.learningRate, momentum=0.1) # #optimizer = tf.train.AdamOptimizer(config.training.learningRate,beta1=0.9,beta2=0.999,epsilon=1e-08,) #optimizer = tf.keras.optimizers.SGD(learning_rate=config.training.learningRate, momentum=0.1,nesterov=False) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=config.training.learningRate) #adadelta #optimizer = tf.train.RMSPropOptimizer(config.training.learningRate, decay=0.9, momentum=0.1, epsilon=1e-10,) gradsAndVars = optimizer.compute_gradients(transformer.loss) ''' mean_grad = tf.zeros(()) for grad, var in gradsAndVars:
word2id_fr = pickle.load(f) with open('./data/europarl/word2id_en.pickle', 'rb') as f: word2id_en = pickle.load(f) with open('./data/europarl/input_sentences.pickle', 'rb') as f: input_sentences = pickle.load(f) with open('./data/europarl/output_sentences.pickle', 'rb') as f: output_sentences = pickle.load(f) n = input_sentences.shape[0] n_train = int(0.9 * n) perm = np.random.permutation(n) train_in = input_sentences[perm[0:n_train]] train_out = output_sentences[perm[0:n_train]] val_in = input_sentences[perm[n_train:n]].values val_out = output_sentences[perm[n_train:n]].values model = Transformer(in_voc=(id2word_fr, word2id_fr), out_voc=(id2word_en, word2id_en), hidden_size=50, lr=1e-3, batch_size=128, beam_size=10, nb_epochs=10, nb_heads=4, pos_enc=True, nb_layers=1) model.fit(train_in, train_out) model.save("./model/transformer.ckpt") # model.evaluate(valid_data=(val_in, val_out)) # requires nltk
# model parameters dim_input = 6 output_sequence_length = Y_DAYS dec_seq_len = Y_DAYS dim_val = 64 dim_attn = 12#12 n_heads = 8 n_encoder_layers = 4 n_decoder_layers = 2 # paths PATHS = crypto_data_paths() MODEL_PATH = 'weights/trans/stock/3days/{e}_{d}_{v}_{y}_seed{seed}'.format(e=n_encoder_layers, d=n_decoder_layers, v=dim_val, y=Y_DAYS, seed=SEED) #init network net = Transformer(dim_val, dim_attn, dim_input, dec_seq_len, output_sequence_length, n_decoder_layers, n_encoder_layers, n_heads) #net.load_state_dict(torch.load(MODEL_PATH)) # load the dataset X_train, y_train, X_test, y_test = create_input_data(PATHS, N_LAGS, Y_DAYS) train_dataset = StockDataset(X_train, y_train) train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE) test_dataset = StockDataset(X_test, y_test) test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE) train(net, N_EPOCHS, train_loader, LR, MODEL_PATH) eval(net, MODEL_PATH, test_loader) #tensorboard --logdir=runs #The MSE is 0.0010176260894923762 0
help="Clipped gradient norm") parser.add_argument("--model_no", type=int, default=0, help="Model ID") parser.add_argument("--num_epochs", type=int, default=350, help="No of epochs") args = parser.parse_args() train_iter, FR, EN, train_length = load_dataloaders(args) src_vocab = len(EN.vocab) trg_vocab = len(FR.vocab) cuda = torch.cuda.is_available() net = Transformer(src_vocab=src_vocab, trg_vocab=trg_vocab, d_model=args.d_model, num=args.num, n_heads=args.n_heads) for p in net.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) criterion = nn.CrossEntropyLoss(reduction="mean", ignore_index=1) optimizer = optim.Adam(net.parameters(), lr=args.lr, betas=(0.9, 0.98), eps=1e-9) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,20,30,40,50,100,200], gamma=0.7) scheduler = CosineWithRestarts(optimizer, T_max=500) if cuda: net.cuda() start_epoch, acc = load_state(net,
from pydub import AudioSegment from models import Transformer from utils import tokenizer, parse_wav, generate_input import tensorflow as tf import numpy as np transformer = Transformer(2, 512, 8, 2048, 4337, pe_input=4096, pe_target=512, rate=0.1, training=False) checkpoint_path = "results/asr-transfer/2/checkpoint-{:02d}".format(37) transformer.load_weights(checkpoint_path) def pre_process(wav_file): log_bank = parse_wav(wav_file) input, mask, target = generate_input(log_bank) return input, mask, target def post_process(predicted_ids): y_predict = u"".join([tokenizer.id2token(id) for id in predicted_ids]) return y_predict def trans_mp3_to_wav(filepath):
batch_size_fn=utils.batch_size_fn, train=True) val_iter = MyIterator(val, batch_size=ARGS.batch_size, device=ARGS.device, repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)), batch_size_fn=utils.batch_size_fn, train=False) print('Done get dataset') # model model = Transformer(len(SRC.vocab), len(TGT.vocab), N=ARGS.n_layers, d_model=ARGS.d_model, d_ff=4 * ARGS.d_model, h=ARGS.n_heads, dropout=ARGS.p_dropout).to(ARGS.device) criterion = LabelSmoothing(size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1).to(ARGS.device) # train if ARGS.run_mode == 'train': optimizer = NoamOpt( ARGS.d_model, 1, 2000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) iter_cnt = 1 min_norm_val_loss = math.inf
def main(argv): # Creating dataloaders for training and validation logging.info("Creating the source dataloader from: %s" % FLAGS.source) logging.info("Creating the target dataloader from: %s" % FLAGS.target) train_dataset, valid_dataset, src_tokenizer, \ tar_tokenizer, size_train, size_val = prepare_training_pairs(FLAGS.source, FLAGS.target, FLAGS.syn_src, FLAGS.syn_tar, batch_size=FLAGS.batch_size, valid_ratio=0.1, name="ENFR") # calculate vocabulary size src_vocsize = len(src_tokenizer.word_index) + 1 tar_vocsize = len(tar_tokenizer.word_index) + 1 # ---------------------------------------------------------------------------------- # Creating the instance of the model specified. logging.info("Create Transformer Model") optimizer = tf.keras.optimizers.Adam() model = Transformer.Transformer(voc_size_src=src_vocsize, voc_size_tar=tar_vocsize, max_pe=10000, num_encoders=FLAGS.num_enc, num_decoders=FLAGS.num_dec, emb_size=FLAGS.emb_size, num_head=FLAGS.num_head, ff_inner=FLAGS.ffnn_dim) # load pretrained mBart if FLAGS.load_mBart: print("Load Pretraining mBART...") mbart_ckpt_dir = FLAGS.mbartckpt latest = tf.train.latest_checkpoint(mbart_ckpt_dir) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) status = checkpoint.restore(tf.train.latest_checkpoint(mbart_ckpt_dir)) status.assert_existing_objects_matched() # ---------------------------------------------------------------------------------- # Choose the Optimizor, Loss Function, and Metrics # create custom learning rate schedule class transformer_lr_schedule( tf.keras.optimizers.schedules.LearningRateSchedule): def __init__(self, emb_size, warmup_steps=4000): super(transformer_lr_schedule, self).__init__() self.emb_size = tf.cast(emb_size, tf.float32) self.warmup_steps = warmup_steps def __call__(self, step): lr_option1 = tf.math.rsqrt(step) lr_option2 = step * (self.warmup_steps**-1.5) return tf.math.rsqrt(self.emb_size) * tf.math.minimum( lr_option1, lr_option2) learning_rate = transformer_lr_schedule(FLAGS.emb_size) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) # Todo: figure out why SparceCategorticalCrossentropy criterion = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') def loss_fn(label, pred): """ The criterion above calculate the loss for all words (voc_size), need to mask the loss that not appears in label """ mask = tf.math.logical_not(tf.math.equal(label, 0)) loss = criterion(label, pred) # convert the mask from Bool to float mask = tf.cast(mask, dtype=loss.dtype) loss *= mask return tf.reduce_sum(loss) / tf.reduce_sum(mask) # ---------------------------------------------------------------------------------- # train/valid function # Todo: need to understand this train_step_signature = [ tf.TensorSpec(shape=[None, None], dtype=tf.int32), tf.TensorSpec(shape=[None, None], dtype=tf.int32) ] @tf.function(input_signature=train_step_signature) def train_step(inp, targ): tar_inp = targ[:, :-1] tar_real = targ[:, 1:] end = tf.cast( tf.math.logical_not( tf.math.equal(tar_inp, tar_tokenizer.word_index['<end>'])), tf.int32) tar_inp *= end # tf.print("tar inp", tar_inp) # tf.print("tar real", tar_real) # create mask enc_padding_mask = Transformer.create_padding_mask(inp) # mask for first attention block in decoder look_ahead_mask = Transformer.create_seq_mask(tf.shape(tar_inp)[1]) dec_target_padding_mask = Transformer.create_padding_mask(tar_inp) combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask) # mask for "enc_dec" multihead attention dec_padding_mask = Transformer.create_padding_mask(inp) with tf.GradientTape() as tape: # feed input into encoder predictions = model(inp, tar_inp, True, enc_padding_mask, combined_mask, dec_padding_mask) train_loss = loss_fn(tar_real, predictions) # optimize step gradients = tape.gradient(train_loss, model.trainable_variables) optimizer.apply_gradients(zip(gradients, model.trainable_variables)) return train_loss @tf.function(input_signature=train_step_signature) def valid_step(inp, targ): tar_inp = targ[:, :-1] tar_real = targ[:, 1:] end = tf.cast( tf.math.logical_not( tf.math.equal(tar_inp, tar_tokenizer.word_index['<end>'])), tf.int32) tar_inp *= end # create mask enc_padding_mask = Transformer.create_padding_mask(inp) # mask for first attention block in decoder look_ahead_mask = Transformer.create_seq_mask(tf.shape(tar_inp)[1]) dec_target_padding_mask = Transformer.create_padding_mask(tar_inp) combined_mask = tf.maximum(dec_target_padding_mask, look_ahead_mask) # mask for "enc_dec" multihead attention dec_padding_mask = Transformer.create_padding_mask(inp) # feed input into encoder predictions = model(inp, tar_inp, False, enc_padding_mask, combined_mask, dec_padding_mask) val_loss = loss_fn(tar_real, predictions) train_accuracy(tar_real, predictions) return val_loss # ---------------------------------------------------------------------------------- # Set up Checkpoints, so as to resume training if something interrupt, and save results ckpt_prefix = os.path.join(FLAGS.ckpt, "ckpt_BT_ENFR_transformer") ckpt = tf.train.Checkpoint(optimizer=optimizer, model=model) manager = tf.train.CheckpointManager(ckpt, directory=FLAGS.ckpt, max_to_keep=2) # restore from latest checkpoint and iteration if not FLAGS.load_mBart: print("Load previous checkpoints...") status = ckpt.restore(manager.latest_checkpoint) if manager.latest_checkpoint: logging.info("Restored from {}".format(manager.latest_checkpoint)) status.assert_existing_objects_matched() else: logging.info("Initializing from scratch.") # ---------------------------------------------------------------------------------- # Setup the TensorBoard for better visualization logging.info("Setup the TensorBoard...") current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") train_log_dir = './logs/gradient_tape/' + current_time + '/BT_ENFR_transformer_train' test_log_dir = './logs/gradient_tape/' + current_time + '/BT_ENFR_transformer_test' train_summary_writer = tf.summary.create_file_writer(train_log_dir) test_summary_writer = tf.summary.create_file_writer(test_log_dir) # ---------------------------------------------------------------------------------- # Start Training Process EPOCHS = FLAGS.epochs for epoch in range(EPOCHS): start = time.time() total_train_loss = 0. total_val_loss = 0. # train for (inp, targ) in train_dataset: train_loss = train_step(inp, targ) total_train_loss += train_loss # save checkpoint if (epoch + 1) % 5 == 0: ckpt.save(file_prefix=ckpt_prefix) # validation for (inp, tar) in valid_dataset: val_loss = valid_step(inp, tar) total_val_loss += val_loss # average loss total_train_loss /= (size_train / FLAGS.batch_size) total_val_loss /= (size_val / FLAGS.batch_size) # Write loss to Tensorborad with train_summary_writer.as_default(): tf.summary.scalar('Train loss', total_train_loss, step=epoch) with test_summary_writer.as_default(): tf.summary.scalar('Valid loss', total_val_loss, step=epoch) logging.info( 'Epoch {} Train Loss {:.4f} Valid loss {:.4f} Valid Accuracy {:.4f}' .format(epoch + 1, total_train_loss, total_val_loss, train_accuracy.result())) logging.info( 'Time taken for 1 train_step {} sec\n'.format(time.time() - start))
maxlen = int(sys.argv[1]) else: maxlen = 16 args = subword_batches(zip(in_texts, tar_texts), maxlen) args = subword_batches(zip(in_valid, tar_valid), maxlen, args) dg = args[params.train_generator] vdg = args[params.valid_generator] input_vocab_size = args[params.input_vocab_size] target_vocab_size = args[params.target_vocab_size] transformer = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, target_len=args[params.valid_seq_len], rate=dropout_rate) transformer.compile(optimizer=optimizer, loss=loss_function, metrics=[accuracy_function, bleu_score]) history = transformer.fit(dg, epochs=params.epochs, validation_data=vdg) transformer.save_weights(weights_dir + '/w' + str(maxlen) + '_ex' + str(params.train_size)) json.dump( history.history, open(history_dir + '/h' + str(maxlen) + '_ex' + str(params.train_size),
# embeddings are attributes if args.dataset_name == 'aids': EMBED_DIM = 4 num_classes = 2 num_heads = 8 depth = 6 p, q = 1, 1 elif args.dataset_name == 'coildel': EMBED_DIM = 2 num_classes = 100 num_heads = 8 depth = 6 p, q = 1, 1 # k, num_heads, depth, seq_length, num_tokens, num_ model = Transformer(EMBED_DIM, num_heads, test_dataset.walklength, depth, num_classes).to(device) lr_warmup = 10000 lr = 1e-3 opt = torch.optim.Adam(lr=lr, params=model.parameters()) sch = torch.optim.lr_scheduler.LambdaLR( opt, lambda i: min(i / (lr_warmup / args.batch_size), 1.0)) loss_func = nn.NLLLoss() def train_validate(model, loader, opt, loss_func, train, device): if train: model.train()
def fit(args): exp_name = get_experiment_name(args) logging_path = os.path.join(args.save_path, exp_name) + ".log" logging.basicConfig(filename=logging_path, level=logging.INFO, format="%(message)s") seed_everything(args.seed) label_map = load_label_map(args.dataset) if args.use_cnn: train_dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_train_features"), label_map=label_map, mode="train", ) val_dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_val_features"), label_map=label_map, mode="val", ) else: train_dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_train_keypoints"), use_augs=args.use_augs, label_map=label_map, mode="train", max_frame_len=169, ) val_dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_val_keypoints"), use_augs=False, label_map=label_map, mode="val", max_frame_len=169, ) train_dataloader = data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True, ) val_dataloader = data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True, ) n_classes = 50 if args.dataset == "include": n_classes = 263 if args.model == "lstm": config = LstmConfig() if args.use_cnn: config.input_size = CnnConfig.output_dim model = LSTM(config=config, n_classes=n_classes) else: config = TransformerConfig(size=args.transformer_size) if args.use_cnn: config.input_size = CnnConfig.output_dim model = Transformer(config=config, n_classes=n_classes) model = model.to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=0.01) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.2) if args.use_pretrained == "resume_training": model, optimizer, scheduler = load_pretrained(args, n_classes, model, optimizer, scheduler) model_path = os.path.join(args.save_path, exp_name) + ".pth" es = EarlyStopping(patience=15, mode="max") for epoch in range(args.epochs): print(f"Epoch: {epoch+1}/{args.epochs}") train_loss, train_acc = train(train_dataloader, model, optimizer, device) val_loss, val_acc = validate(val_dataloader, model, device) logging.info( "Epoch: {}, train loss: {}, train acc: {}, val loss: {}, val acc: {}" .format(epoch + 1, train_loss, train_acc, val_loss, val_acc)) scheduler.step(val_acc) es( model_path=model_path, epoch_score=val_acc, model=model, optimizer=optimizer, scheduler=scheduler, ) if es.early_stop: print("Early stopping") break print("### Training Complete ###")
steps_per_epoch = train_data_size // batch_size warmup_steps = int(epochs * train_data_size * 0.1 / batch_size) eval_steps = dev_data_size // batch_size * 10 # optimizer = tf.keras.optimizers.Adam(learning_rate, beta_1=0.9, beta_2=0.98, epsilon=1e-9) optimizer = get_optimizer(learning_rate, steps_per_epoch, epochs, warmup_steps) callbacks = get_callbacks(model_dir) transformer = Transformer(num_layers, d_model, num_heads, dff, target_vocab_size, pe_input=pe_input, pe_target=pe_target, rate=dropout_rate, training=train) transformer.compile(optimizer=optimizer, loss=loss_function, metrics=[metric_fn()]) checkpoint_path = "results/asr-transfer/2/checkpoint-{:02d}".format(37) transformer.load_weights(checkpoint_path) transformer.fit( train_data, # validation_data=dev_data, steps_per_epoch=steps_per_epoch, epochs=epochs,
def main(): device = torch.device("cpu" if hparams.no_cuda else "cuda") print("=== build model ===") start = time.time() model = Transformer(hparams.d_model, hparams.d_ff, vocab_size, hparams.num_heads, hparams.num_layers, hparams.max_len, hparams.dropout, EOS_id, PAD_id, device).to(device) end = time.time() print("=== build model done === {} seconds".format(end - start)) train.global_step = 0 # train_dataset, val_dataset = split_data(train_path_en, train_path_de, hparams.validation_rate) train_dataset = make_dataset(train_path_en, train_path_de) val_dataset = make_dataset(val_path_en, val_path_de) train_loader = DataLoader(train_dataset, batch_size=hparams.batch_size, collate_fn=custom_collate, shuffle=True, num_workers=hparams.num_workers) val_loader = DataLoader(val_dataset, batch_size=hparams.batch_size, collate_fn=custom_collate, num_workers=hparams.num_workers) criterion = torch.nn.NLLLoss(ignore_index=PAD_id, reduction="sum").to(device) optimizer = torch.optim.Adam(model.parameters(), hparams.lr) writer = SummaryWriter() for epoch in range(hparams.max_epochs): """train""" print("=== train start ===") start = time.time() loss, bleu_score = train(model, train_loader, criterion, optimizer, device, writer, epoch, hparams.print_steps) end = time.time() print("=== train done === {} seconds".format(end - start)) print("epoch: {}/{}, loss: {}, bleu score: {}".format( epoch + 1, hparams.max_epochs, loss, bleu_score)) torch.save(model.state_dict(), save_path) print("model saved to '{}'".format(os.path.abspath(save_path))) writer.add_scalar("Loss/train", loss, epoch + 1) writer.add_scalar("Bleu score/train", bleu_score, epoch + 1) """""" print("=== evaluation start ===") start = time.time() loss, bleu_score = evaluate(model, val_loader, criterion, optimizer, device, writer) end = time.time() print("=== evaluation done === {} seconds".format(end - start)) print("epoch: {}/{}, loss: {}, bleu score: {}".format( epoch + 1, hparams.max_epochs, loss, bleu_score)) writer.add_scalar("Loss/eval", loss, epoch + 1) writer.add_scalar("Bleu score/eval", bleu_score, epoch + 1)
#print(df[df["T (degC)"] < 0]["T (degC)"]) df = df.drop(["max. wv (m/s)", "wv (m/s)"], axis=1) day = 24 * 60 * 60 year = (365.2425) * day timestamp_s = date_time.map(datetime.datetime.timestamp) df['Day sin'] = np.sin(timestamp_s * (2 * np.pi / day)) df['Day cos'] = np.cos(timestamp_s * (2 * np.pi / day)) df['Year sin'] = np.sin(timestamp_s * (2 * np.pi / year)) df['Year cos'] = np.cos(timestamp_s * (2 * np.pi / year)) print(df.head(20)) # モデルのインスタンスを作成 model = Transformer() #model = LSTMM() loss_object = tf.keras.losses.BinaryCrossentropy() optimizer = tf.keras.optimizers.Adam(0.01) # train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.BinaryAccuracy() test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.BinaryAccuracy() @tf.function def train_step(seq, labels): with tf.GradientTape() as tape: predictions = model(seq) tf.print(predictions[0])
def generate_predictions(ckpt, path_src, path_tar, input_file_path: str, pred_file_path: str, num_sync): """Generates predictions for the machine translation task (EN->FR). You are allowed to modify this function as needed, but one again, you cannot modify any other part of this file. We will be importing only this function in our final evaluation script. Since you will most definitely need to import modules for your code, you must import these inside the function itself. Args: input_file_path: the file path that contains the input data. pred_file_path: the file path where to store the predictions. Returns: None """ # load input file => create test dataloader => (spm encode) from data.dataloaders import prepare_test BATCH_SIZE = 128 TOTAL_ITER = int((num_sync / 128)) # load tokenizer of train to tokenize test data import pickle f_src = open(path_src, 'rb') f_tar = open(path_tar, 'rb') src_tokenizer = pickle.load(f_src) tar_tokenizer = pickle.load(f_tar) test_dataset, test_max_length = prepare_test(input_file_path, src_tokenizer, batch_size=BATCH_SIZE) # create model from models import Transformer import tensorflow as tf src_vocsize = len(src_tokenizer.word_index) + 1 tar_vocsize = len(tar_tokenizer.word_index) + 1 # create model instance optimizer = tf.keras.optimizers.Adam() model = Transformer.Transformer(voc_size_src=src_vocsize, voc_size_tar=tar_vocsize, max_pe=10000, num_encoders=4, num_decoders=4, emb_size=512, num_head=8, ff_inner=1024) # Load CheckPoint ckpt_dir = ckpt checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) status = checkpoint.restore(tf.train.latest_checkpoint(ckpt_dir)) status.assert_existing_objects_matched() # Greedy Search / Beam Search and write to pred_file_path import time from translate import translate_batch start = time.time() count = 0 with open(pred_file_path, 'w', encoding='utf-8') as pred_file: for (batch, (inp)) in enumerate(test_dataset): print("Evaluating Batch: %s" % batch) batch_size = tf.shape(inp)[0].numpy() translation = translate_batch(model, inp, batch_size, tar_tokenizer) for sentence in translation: pred_file.write(sentence.strip() + '\n') pred_file.flush() count += 1 if count > TOTAL_ITER: break end = time.time() print("Translation finish in %s s" % (end - start))
print(src_tokenizer.index_word) src_vocsize = len(src_tokenizer.word_index) + 1 tar_vocsize = len(tar_tokenizer.word_index) + 1 print("Source Language voc size: %s" % src_vocsize) print("Target Language voc size: %s" % tar_vocsize) print("Source Language max length: %s" % source_max_length) print("Target Language max length: %s" % target_max_length) model = Transformer.Transformer(voc_size_src=src_vocsize, voc_size_tar=tar_vocsize, src_max_length=source_max_length, tar_max_length=target_max_length, num_encoders=1, num_decoders=1, emb_size=8, num_head=2, ff_inner=1024) tf.random.set_seed(12) for src, tar in train_dataset: # create mask enc_padding_mask = Transformer.create_padding_mask(src) print("enc_padding_mask", enc_padding_mask.numpy()) # mask for first attention block in decoder look_ahead_mask = Transformer.create_seq_mask(target_max_length) print("look ahead mask", look_ahead_mask.numpy()) dec_target_padding_mask = Transformer.create_padding_mask(tar)
plt.ylabel("Learning Rate") plt.xlabel("Train Step") # Loss and Metrics # ------------------------------------------------------------------------------------------------------- print("Loss and Metrics\n------------------------------------------") loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy') # Training and Checkpoints # ------------------------------------------------------------------------------------------------------- print("Training and Checkpoints\n------------------------------------------") transformer = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, dropout_rate) checkpoint_path = "./checkpoints/train" ckpt = tf.train.Checkpoint(transformer=transformer, optimizer=optimizer) ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=5) # if a checkpoint exists, restore the latest checkpoint. if ckpt_manager.latest_checkpoint: ckpt.restore(ckpt_manager.latest_checkpoint) print ('Latest checkpoint restored!!') for epoch in range(EPOCHS): start = time.time()
def generate_predictions(input_file_path: str, pred_file_path: str): """Generates predictions for the machine translation task (EN->FR). You are allowed to modify this function as needed, but one again, you cannot modify any other part of this file. We will be importing only this function in our final evaluation script. Since you will most definitely need to import modules for your code, you must import these inside the function itself. Args: input_file_path: the file path that contains the input data. pred_file_path: the file path where to store the predictions. Returns: None """ # --------------------------------------------------------------------- # Include essential module for evaluation import os import json import pickle import time import tensorflow as tf from data.dataloaders import prepare_training_pairs, prepare_test from models import Transformer from translate import translate_batch from definition import ROOT_DIR CONFIG = "eval_cfg.json" # --------------------------------------------------------------------- # Load setting in json file with open(os.path.join(ROOT_DIR, CONFIG)) as f: para = json.load(f) batch_size = para["batch_size"] source = para["src"] target = para["tar"] ckpt_dir = para["ckpt"] # --------------------------------------------------------------------- # Create test dataloader from input file (tokenized and map to sequence) # Todo: The final training and target tokenizer is needed, so as to use the same tokenizer on test data, # because we didn't build a dictionary file. f_src = open(source, 'rb') f_tar = open(target, 'rb') src_tokenizer = pickle.load(f_src) tar_tokenizer = pickle.load(f_tar) test_dataset, test_max_length = prepare_test(input_file_path, src_tokenizer, batch_size=batch_size) # calculate vocabulary size src_vocsize = len(src_tokenizer.word_index) + 1 tar_vocsize = len(tar_tokenizer.word_index) + 1 # --------------------------------------------------------------------- # Create the instance of model to load checkpoints # Todo: Define the model that fit the checkpoints you want to load optimizer = tf.keras.optimizers.Adam() model = Transformer.Transformer(voc_size_src=src_vocsize, voc_size_tar=tar_vocsize, max_pe=10000, num_encoders=4, num_decoders=4, emb_size=512, num_head=8, ff_inner=1024) # --------------------------------------------------------------------- # Load CheckPoint checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) status = checkpoint.restore(tf.train.latest_checkpoint(ckpt_dir)) # check if loading is successful status.assert_existing_objects_matched() # --------------------------------------------------------------------- # Use Greedy Search to generate prediction and write to pred_file_path start = time.time() with open(pred_file_path, 'w', encoding='utf-8') as pred_file: for (batch, (inp)) in enumerate(test_dataset): if batch % 5 == 0: print("Evaluating Batch: %s" % batch) batch_size = tf.shape(inp)[0].numpy() translation = translate_batch(model, inp, batch_size, tar_tokenizer) for sentence in translation: pred_file.write(sentence.strip() + '\n') end = time.time() print("Translation finish in %s s" % (end - start))