#print("bounds") #print(upper_confidence_bound(np.asarray([[0.00617284, 0.48765432]]))) min_val = scipydirect.minimize(neg_upper_confidence_bound,bounds) xval = min_val.x acc_targets = multi_fid_values['accuracy_targets']+[0.0] out_fid_level = num_fidelities-1# defaults to highest fidelity function for fid_level,(acc,reg) in enumerate(zip(acc_targets,regressors)): mean,stdev = reg.predict([min_val.x], return_std=True) if stdev*beta > acc: out_fid_level = fid_level break yval = -neg_upper_confidence_bound([xval]) return xval,yval,out_fid_level if __name__ == "__main__": assert len(sys.argv) == 2 , "needs one parameter, the data filename." data = json.load(open(sys.argv[1])) trans = Transformer(data) #ys,xs = parse_data(data,trans) #bounds = trans.get_bounds() #print(xs) #print(ys) #print(bounds) res = next_point(data,trans) print(res) inv_res = trans.inverse_point(res[0]) print(inv_res)
def encode_char(c): return ord(c) - 32 def encode_smiles(string, start_char=EXTRA_CHARS['seq_start']): return torch.tensor([ord(start_char)] + [encode_char(c) for c in string], dtype=torch.long)[:args.max_length].unsqueeze(0) smiles_strings = [line.strip("\n") for line in open(args.data_path, "r")] print("Loaded {0} SMILES strings from {1}".format(len(smiles_strings), args.data_path)) print("Initializing Transformer...") model = Transformer(ALPHABET_SIZE, args.embedding_size, args.num_layers).eval() model = torch.nn.DataParallel(model) print("Transformer Initialized.") print("Loading pretrained weights from", args.checkpoint_path) checkpoint = torch.load(args.checkpoint_path, map_location=torch.device("cpu")) model.load_state_dict(checkpoint['state_dict']) print("Pretrained weights loaded") model = model.module.cpu() encoder = model.encoder.cpu() embeddings = [] with torch.no_grad(): for smiles in smiles_strings: encoded = encode_smiles(smiles) mask = create_masks(encoded)
class_truth = torch.empty(B_hat.shape[0], dtype=torch.long).fill_(class1_label).cpu() loss_cls_val = loss_cls(class_prediction, class_truth) loss = loss_mse_val + wt*loss_cls_val optimizer.zero_grad() loss.backward() optimizer.step() print('[epoch %d/%d] loss: %f, mse_loss: %f, cls_loss: %f'%(epoch+1, n_epoch, loss.item(), loss_mse_val.item(), loss_cls_val.item())) torch.save({'state_dict':transformer.state_dict(), 'optimizer': optimizer.state_dict()}, save_model_path+'transformer_%d.pth'%(class1_label)) if __name__ == '__main__': feature_dim = 512 n_classes = 80 classes_centroids_npy = 'classes_centroids.npy' classes_centroids = np.load(classes_centroids_npy) ResNet_model_path = 'Res_trained_39.pkl' my_transformer = Transformer(feature_dim) # my_classifier = ResNetFeat.ResNet18(num_classes=80) # my_classifier.load_state_dict(torch.load(ResNet_model_path)) my_classifier = torch.load(ResNet_model_path) optimizer = torch.optim.Adam(my_transformer.parameters(), lr=1e-4) # print(my_classifier) for i in range(n_classes): for j in range(i+1, n_classes): class1_centroids = classes_centroids[i] class2_centroids = classes_centroids[j] train(my_transformer, my_classifier, optimizer, class1_centroids, class2_centroids, i)
from torch.utils.data import DataLoader from transformer import Transformer, Config from data.dataset import CorpusDataset, TokenSentenceConverter import tqdm from evaluation.translate import translate_batch from config import * model = Transformer(Config(model_config)) model.load_state_dict(torch.load('model_state_dict/5epoch/transformer.pkl')) model.cuda() model.eval() batch_size = 25 converter = TokenSentenceConverter('data/vocab.pkl') dataset = CorpusDataset('data/corpus/test_en', 'data/corpus/test_cn', converter, to_token=False) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=lambda x: ([s[0] for s in x], [s[1] for s in x])) translate = lambda x: translate_batch(model, converter, [x])[1] bleu1 = bleu2 = bleu3 = bleu4 = 0 dataloader = iter(dataloader) batches = 2 translate_result = 'Top 50 Results:\n\n' with torch.no_grad(), tqdm.tqdm(range(batches)) as t: for _ in t: src, tgt = next(dataloader) result, s = translate_batch(model, converter, src, tgt)
def do_evaluation(user_config, input_file_path, target_file_path, pred_file_path): inp_language = user_config["inp_language"] target_language = user_config["target_language"] print("\n****Evaluating model from {} to {}****\n".format( inp_language, target_language)) print("****Loading Sub-Word Tokenizers****") # load pre-trained tokenizer tokenizer_inp, tokenizer_tar = utils.load_tokenizers( inp_language, target_language, user_config) print("****Initializing DataLoader****") # dummy data loader. required for loading checkpoint dummy_dataloader = DataLoader( user_config["transformer_batch_size"], user_config["dummy_data_path_{}".format(inp_language)], None, tokenizer_inp, tokenizer_tar, inp_language, target_language, False) dummy_dataset = dummy_dataloader.get_data_loader() # data loader test_dataloader = DataLoader(user_config["transformer_batch_size"], input_file_path, target_file_path, tokenizer_inp, tokenizer_tar, inp_language, target_language, False) test_dataset = test_dataloader.get_data_loader() input_vocab_size = tokenizer_inp.vocab_size target_vocab_size = tokenizer_tar.vocab_size use_pretrained_emb = user_config["use_pretrained_emb"] if use_pretrained_emb: pretrained_weights_inp = np.load( user_config["pretrained_emb_path_{}".format(inp_language)]) pretrained_weights_tar = np.load( user_config["pretrained_emb_path_{}".format(target_language)]) else: pretrained_weights_inp = None pretrained_weights_tar = None transformer_model = Transformer( user_config["transformer_num_layers"], user_config["transformer_model_dimensions"], user_config["transformer_num_heads"], user_config["transformer_dff"], input_vocab_size, target_vocab_size, en_input=input_vocab_size, fr_target=target_vocab_size, rate=user_config["transformer_dropout_rate"], weights_inp=pretrained_weights_inp, weights_tar=pretrained_weights_tar) sacrebleu_metric(transformer_model, pred_file_path, None, tokenizer_tar, dummy_dataset, tokenizer_tar.MAX_LENGTH) print("****Loading Model****") # load model model_path = user_config["model_file"] transformer_model.load_weights(model_path) print("****Generating Translations****") sacrebleu_metric(transformer_model, pred_file_path, target_file_path, tokenizer_tar, test_dataset, tokenizer_tar.MAX_LENGTH)
n_pixels = 50 total_pixels = int(n_pixels * n_pixels) pixel_scale = 0.1 x_grid_in_radians, y_grid_in_radians = grid_2d_in_radians( n_pixels=n_pixels, pixel_scale=pixel_scale ) grid_1d_in_radians = np.array([ np.ndarray.flatten(y_grid_in_radians), np.ndarray.flatten(x_grid_in_radians) ]).T # plot_grid(x_grid=x_grid_in_radians, y_grid=y_grid_in_radians) # exit() transformer = Transformer( uv_wavelengths=uv_wavelengths, grid=grid_1d_in_radians, preload_transform=True ) #print(transformer.cube_shape);exit() theta = [ int(n_pixels / 2.0), int(n_pixels / 2.0), transformer.n_channels / 2.0, 0.25, 0.75 / pixel_scale, 50.0, 65.0, 0.2 / pixel_scale, 300.0, 50.0
def do_train(args): device = paddle.set_device("gpu" if args.use_cuda else "cpu") fluid.enable_dygraph(device) if args.eager_run else None # set seed for CE random_seed = eval(str(args.random_seed)) if random_seed is not None: fluid.default_main_program().random_seed = random_seed fluid.default_startup_program().random_seed = random_seed # define inputs inputs = [ Input( [None, None], "int64", name="src_word"), Input( [None, None], "int64", name="src_pos"), Input( [None, args.n_head, None, None], "float32", name="src_slf_attn_bias"), Input( [None, None], "int64", name="trg_word"), Input( [None, None], "int64", name="trg_pos"), Input( [None, args.n_head, None, None], "float32", name="trg_slf_attn_bias"), Input( [None, args.n_head, None, None], "float32", name="trg_src_attn_bias"), ] labels = [ Input( [None, 1], "int64", name="label"), Input( [None, 1], "float32", name="weight"), ] # def dataloader (train_loader, train_steps_fn), ( eval_loader, eval_steps_fn) = create_data_loader(args, device) # define model model = paddle.Model( Transformer(args.src_vocab_size, args.trg_vocab_size, args.max_length + 1, args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model, args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout, args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd, args.weight_sharing, args.bos_idx, args.eos_idx), inputs, labels) model.prepare( fluid.optimizer.Adam( learning_rate=fluid.layers.noam_decay( args.d_model, args.warmup_steps, learning_rate=args.learning_rate), beta1=args.beta1, beta2=args.beta2, epsilon=float(args.eps), parameter_list=model.parameters()), CrossEntropyCriterion(args.label_smooth_eps)) ## init from some checkpoint, to resume the previous training if args.init_from_checkpoint: model.load(args.init_from_checkpoint) ## init from some pretrain models, to better solve the current task if args.init_from_pretrain_model: model.load(args.init_from_pretrain_model, reset_optimizer=True) # model train model.fit(train_data=train_loader, eval_data=eval_loader, epochs=args.epoch, eval_freq=1, save_freq=1, save_dir=args.save_model, callbacks=[ TrainCallback( args, train_steps_fn=train_steps_fn, eval_steps_fn=eval_steps_fn) ])
def train(args): print("Start Time:\t{}".format(time.ctime())) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model1 = Transformer() model2 = Transformer() state_dict1 = torch.load(args.model1) state_dict2 = torch.load(args.model2) model1.load_state_dict(state_dict1) model2.load_state_dict(state_dict2) model1.to(device) model2.to(device) vgg = VGG16().to(device) train_dataset = datasets.ImageFolder( args.datapath, transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Lambda(lambda x: x.mul(255)) ])) train_loader = DataLoader(train_dataset, batch_size=args.batch_size) transformer = Transformer(norm='instance', padding='reflect').to(device) optimizer = Adam(transformer.parameters(), args.lr) mse_loss = torch.nn.MSELoss() loss = [] run_time = time.strftime("%d-%H-%M-%S") for epoch_num in range(args.epochs): transformer.train() agg_one_loss = 0.0 agg_two_loss = 0.0 count = 0 for batch_id, (x, _) in enumerate(train_loader): n_batch = len(x) count += n_batch optimizer.zero_grad() content = x.to(device) y_hat = transformer(content) y_model1 = model1(content) y_model2 = model2(content) features_yh = vgg(normalize(y_hat)) features_y1 = vgg(normalize(y_model1)) features_y2 = vgg(normalize(y_model2)) # Do this but with losses from the output of the VGG blocks # one_loss = mse_loss(y_hat, y_model1) # two_loss = mse_loss(y_hat, y_model2) one_loss = sum( np.array([ mse_loss(feat_yh, feat_y1) for feat_yh, feat_y1 in zip( features_yh.values(), features_y1.values()) ])) two_loss = sum( np.array([ mse_loss(feat_yh, feat_y2) for feat_yh, feat_y2 in zip( features_yh.values(), features_y2.values()) ])) total_loss = one_loss + two_loss total_loss.backward() optimizer.step() agg_one_loss += one_loss.item() agg_two_loss += two_loss.item() if (batch_id + 1) % args.log_interval == 0: mesg = "[{}/{}]\tTotal: {:.2f}\tModel 1: {:.2f}\tModel 2: {:.2f}".format( count, len(train_dataset), (agg_one_loss + agg_two_loss) / (batch_id + 1), agg_one_loss / (batch_id + 1), agg_two_loss / (batch_id + 1), ) print(mesg) loss.append([ batch_id + 1, agg_one_loss / (batch_id + 1), agg_two_loss / (batch_id + 1), (agg_one_loss + agg_two_loss) / (batch_id + 1) ]) if args.checkpoint_dir is not None and ( batch_id + 1) % args.checkpoint_interval == 0: transformer.eval().cpu() ckpt_model_filename = "ckpt_epoch_" + str( epoch_num + 1) + "_batch_id_" + str(batch_id + 1) + ".pth" ckpt_model_path = os.path.join(args.checkpoint_dir, ckpt_model_filename) torch.save(transformer.state_dict(), ckpt_model_path) transformer.to(device).train() save_loss_plot( np.array(loss), args.log_dir + '/train_loss{}.jpg'.format(run_time)) # save model and parameter log transformer.eval().cpu() if args.savename is None: save_model_filename = "epoch_" + str(args.epochs) + "_" + str( time.strftime("%d-%H-%M-%S")) + ".model" else: save_model_filename = args.savename save_model_path = os.path.join(args.save_dir, save_model_filename) torch.save(transformer.state_dict(), save_model_path) # save loss in pickle file with open('{}/loss{}'.format(args.log_dir, run_time), 'wb') as fp: pickle.dump(loss, fp) with open('{}/param_log{}.txt'.format(args.log_dir, run_time), 'w') as f: f.write("Epochs: {}\n".format(args.epochs)) f.write("Batch Size: {}\n".format(args.batch_size)) f.write("Dataset: {}\n".format(args.datapath)) f.write("Learning Rate: {}\n".format(args.lr)) f.write("Model 1: {}\n".format(args.model1)) f.write("Model 2: {}\n".format(args.model2)) print("\nDone, trained model saved at", save_model_path)
a[j] = 0 if j == j1: j1 -= 1 else: if j == j0: j0 += 1 p = 0.0 current = 0 i = n while current < x: i -= 1 current += 1 p += a[i] if math.fabs(p - 1) < 1e-10: p = 1 return p sx = [int(x) for x in open("sequence.txt")] if (max(sx) > 1): t = Transformer(sx) s = t.toUniform(0, 1) chi(s) serial(s) gap(s) poker(s) permutation(s) monotonic(s) conflict(s) nb = input()
def main(args): # Construct Solver # data tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, batch_frames=args.batch_frames) tr_loader = AudioDataLoader(tr_dataset, batch_size=1, num_workers=args.num_workers, shuffle=args.shuffle, LFR_m=args.LFR_m, LFR_n=args.LFR_n) cv_loader = AudioDataLoader(cv_dataset, batch_size=1, num_workers=args.num_workers, LFR_m=args.LFR_m, LFR_n=args.LFR_n) # load dictionary and generate char_list, sos_id, eos_id char_list, sos_id, eos_id = process_dict(args.dict) vocab_size = len(char_list) data = {'tr_loader': tr_loader, 'cv_loader': cv_loader} # model encoder = Encoder(args.d_input * args.LFR_m, args.n_layers_enc, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, pe_maxlen=args.pe_maxlen) decoder = Decoder( sos_id, eos_id, vocab_size, args.d_word_vec, args.n_layers_dec, args.n_head, args.d_k, args.d_v, args.d_model, args.d_inner, dropout=args.dropout, tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing, pe_maxlen=args.pe_maxlen) model = Transformer(encoder, decoder) print(model) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') #model.cuda() model.to(device) # optimizer optimizier = TransformerOptimizer( torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09), args.k, args.d_model, args.warmup_steps) # solver solver = Solver(data, model, optimizier, args) solver.train()
from transformer import Transformer import torch transformer_model = Transformer(nhead=4, num_encoder_layers=2) src = torch.rand((10, 32, 512)) # 10 is the number of words in the sentence, 32 is the batch size and 512 is the dimensionality of a word?? tgt = torch.rand((20, 32, 512)) out, loss = transformer_model(src, tgt) print(out.shape)
collate_fn=generateBatch) testIter = DataLoader(testData, batch_size=BATCH_SIZE, shuffle=True, collate_fn=generateBatch) ### BUILD MODEL device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = Transformer( embeddingSize=256, srcVocabSize=len(sourceVocab), trgVocabSize=len(targetVocab), srcPadIdx=PAD_IDX, numHeads=8, numEncoderLayers=3, numDecoderLayers=3, forwardExpansion=4, dropout=0.2, maxLen=350, device=device, ).to(device) optimizer = optim.Adam(model.parameters(), lr=0.0003) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=10, verbose=True) criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX) ### TRAIN AND EVALUATE
def main(): train_data = SentenceDataset(args.train_file, encoding_type=args.encoding_type, filter_threshold=args.filter_threshold) val_data = SentenceDataset(args.val_file, encoding_type=args.encoding_type, filter_threshold=args.filter_threshold) train_loader = torch.utils.data.DataLoader(train_data, args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val_data, args.batch_size) print(len(train_loader)) input_dim = len(train_data.vocab.source_vocab) output_dim = len(train_data.vocab.target_vocab) static = args.embedding_type == 'static' device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') enc_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len, device, static) encoder_layer = EncoderLayer(args.hidden_dim, args.num_enc_heads, args.inner_dim, args.dropout) encoder = Encoder(enc_embedding, encoder_layer, args.num_enc_layers, args.dropout) dec_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len, device, static) decoder_layer = DecoderLayer(args.hidden_dim, args.num_dec_heads, args.inner_dim, args.dropout) decoder = Decoder(output_dim, args.hidden_dim, dec_embedding, decoder_layer, args.num_dec_layers, args.dropout) pad_id = train_data.vocab.source_vocab['<pad>'] model = Transformer(encoder, decoder, pad_id, device) print('Transformer has {:,} trainable parameters'.format( count_parames(model))) if args.load_model is not None: model.load(args.load_model) else: model.apply(init_weights) if args.mode == 'test': inferencer = Inferencer(model, train_data.vocab, device) greedy_out = inferencer.infer_greedy( 'helo world, I m testin a typo corector') print(greedy_out) elif args.mode == 'train': optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_function = nn.NLLLoss(ignore_index=pad_id) print('Started training...') train(model, train_loader, val_loader, optimizer, loss_function, device) else: raise ValueError('Mode not recognized')
def __create_transformer_block(self, num_transformers, dropout=0.3): transformers = [] for i in range(num_transformers): transformers.append(Transformer(dim_embedding=self.dim_embedding, num_heads=self.num_heads, dropout=dropout)) return nn.Sequential(*transformers)
print('Steps {} Loss {:.4f}'.format(s, train_loss.result())) self.train_step(self.train_iter.next()) print('Steps {} Loss {:.4f}'.format(steps, train_loss.result())) self.model.save() print('model saved') print('training finished') if __name__ == "__main__": #train_data = VQA(r'D:\documents\coding\Data\coco\v2_mscoco_train2014_annotations.json', #r'D:\documents\coding\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', #r'D:\documents\coding\Data\coco\train2014\COCO_train2014_{0}.jpg', #r'D:\documents\coding\Data\coco\v2_mscoco_train2014_complementary_pairs.json') train_data = VQA( r'D:\lgy\Document\Python\Data\coco\v2_mscoco_train2014_annotations.json', r'D:\lgy\Document\Python\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json', r'D:\lgy\Document\Python\Data\coco\train2014\COCO_train2014_{0}.jpg') train_iter = VQAIter(train_data, train_data.getQuesIds(ansTypes=['other', 'yes/no']), hp.batch_size, hp.num_chunks) max_qst_len = hp.max_qst_len max_ans_len = hp.max_ans_len model = Transformer(hp.num_layers, hp.d_model, hp.num_heads, hp.dff, max_qst_len + 3, hp.dropout_rate) trainer = Trainer(train_iter, model, 16, max_qst_len, max_ans_len) trainer.train(hp.steps, hp.steps_per_save, hp.steps_per_chunk, hp.steps_per_report)
def evaluate_transformer(): tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(os.path.join(output_path, tag_new_tok + "tokenizer_en_" + str(DICT_SIZE))) tokenizer_de = tfds.features.text.SubwordTextEncoder.load_from_file(os.path.join(output_path, tag_new_tok + "tokenizer_de_" + str(DICT_SIZE))) input_vocab_size = tokenizer_de.vocab_size + 2 target_vocab_size = tokenizer_en.vocab_size + 2 transformer1 = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, rate=dropout_rate) ckpt = tf.train.Checkpoint(transformer1=transformer1) ckpt.restore(tf.train.latest_checkpoint(checkpoint_path)).expect_partial() print('Latest checkpoint restored!!') examples, metadata = tfds.load('wmt14_translate/de-en', data_dir=data_path, with_info=True, as_supervised=True) test_examples = examples['test'] def predict(inp_sentence): start_token = [tokenizer_de.vocab_size] end_token = [tokenizer_de.vocab_size + 1] # inp sentence is german, hence adding the start and end token inp_sentence = start_token + tokenizer_de.encode(inp_sentence) + end_token encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is english, the first word to the transformer should be the # english start token. decoder_input = [tokenizer_en.vocab_size] output = tf.expand_dims(decoder_input, 0) # predictions.shape == (batch_size, seq_len, vocab_size) def symbols_to_logits(output): batched_input = tf.tile(encoder_input, [beam_width, 1]) enc_padding_mask, combined_mask, dec_padding_mask = create_masks( batched_input, output) predictions, attention_weights = transformer1(batched_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) predictions = predictions[:, -1, :] return predictions finished_seq, finished_scores, states= beam_search(symbols_to_logits, output, beam_width, MAX_LENGTH, target_vocab_size, alpha, states=None, eos_id=tokenizer_en.vocab_size+1, stop_early=True, use_tpu=False, use_top_k_with_unique=True) return finished_seq[0, 0, :] def translate(sentence): result = predict(sentence) predicted_sentence = tokenizer_en.decode([i for i in result if i < tokenizer_en.vocab_size]) print('Input: {}'.format(sentence)) print('Predicted translation: {}'.format(predicted_sentence)) return predicted_sentence translations = [] inputs = [] targets = [] BLEUs = [] for sentence in test_examples: inp = sentence[0].numpy().decode('utf-8') target = sentence[1].numpy().decode('utf-8') translation = translate(inp) BLEU = nltk.translate.bleu_score.sentence_bleu([nltk.word_tokenize(target)], nltk.word_tokenize(translation)) translations.append(translation) inputs.append(inp) BLEUs.append(BLEU) print('Average BLEU score: ', 100 * np.mean(BLEUs)) targets.append(target) d = {'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs} df = pd.DataFrame.from_dict(d) df.to_csv(os.path.join(output_path, 'results_'+experiment_name+'.csv')) print('Average BLEU score: ', 100 * np.mean(BLEUs))
def transform(self, data): transformer = Transformer() transformer.transform_categorial(data, self.categorial_name, self.C) return transformer.data
from prefect import Flow, task from extracter import Extracter from transformer import Transformer from loader import Loader with Flow("ETL") as flow: url = 'https://www.marketbeat.com/stocks/NASDAQ/MSFT/price-target/?MostRecent=0' e = Extracter(url).extract() df = Transformer().transform(text=e) l = Loader().load(df) flow.run()
def train(fv, model_name, criterion, balance=False, batchsize=64, size=0): if fv == "matlab": dloader = matloader else: dloader = fvloader train_data = dloader.load_train_data(size=size, balance=balance, fv=fv) val_data = dloader.load_val_data(size=size, fv=fv) test_data = dloader.load_test_data(size=size, fv=fv) # model_name = "transformer_%s_size%d_bce" % (fv, size) model_dir = os.path.join("./modeldir/%s" % model_name) model_pth = os.path.join(model_dir, "model.pth") writer = tensorboardX.SummaryWriter(model_dir) if os.path.exists(model_pth): print("------load model--------") model = torch.load(model_pth) else: # model = Transformer(fv, NUM_HEADS=4, NUM_LAYERS=3).cuda() model = Transformer(fv).cuda() model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=0.001) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( # optimizer, factor=0.5, # patience=30, min_lr=1e-4) epochs = 2000 step = 1 val_step = 1 max_f1 = 0.0 for e in range(epochs): model.train() print("------epoch--------", e) st = time.time() train_shuffle = fvloader.shuffle(train_data) for item in fvloader.batch_fv(train_shuffle, batch=batchsize): # for name, param in model.named_parameters(): # writer.add_histogram( # name, param.clone().cpu().data.numpy(), step) # writer.add_histogram( # "grad/"+name, param.grad.clone().cpu().data.numpy(), step) model.zero_grad() genes, nimgs, labels, timesteps = item inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor) gt = torch.from_numpy(labels).type(torch.cuda.FloatTensor) pd = model(inputs) # loss = criterion(pd, gt) all_loss = criterion(pd, gt) label_loss = torch.mean(all_loss, dim=0) loss = torch.mean(label_loss) # for i in range(6): # writer.add_scalar("train sl_%d_loss" % i, # label_loss[i].item(), step) train_pd = torch_util.threshold_tensor_batch(pd) np_pd = train_pd.data.cpu().numpy() torch_util.torch_metrics( labels, np_pd, writer, step, mode="train") writer.add_scalar("train loss", loss, step) loss.backward() optimizer.step() step += 1 et = time.time() writer.add_scalar("train time", et - st, e) for param_group in optimizer.param_groups: writer.add_scalar("lr", param_group['lr'], e) # run_origin_train(model, imbtrain_data, writer, e, criterion) if e % 1 == 0: val_loss, val_f1 = run_val( model, dloader, val_data, writer, val_step, criterion) # scheduler.step(val_loss) val_step += 1 if e == 0: start_loss = val_loss min_loss = start_loss # if val_loss > 2 * min_loss: # print("early stopping at %d" % e) # break # if e % 50 == 0: # pt = os.path.join(model_dir, "%d.pt" % e) # torch.save(model.state_dict(), pt) # result = os.path.join(model_dir, "result_epoch%d.txt" % e) # run_test(model, test_data, result) if min_loss > val_loss or max_f1 < val_f1: if min_loss > val_loss: print("---------save best----------", "loss", val_loss) min_loss = val_loss if max_f1 < val_f1: print("---------save best----------", "f1", val_f1) max_f1 = val_f1 torch.save(model, model_pth) result = os.path.join(model_dir, "result_epoch%d.txt" % e) run_test(model, dloader, test_data, result)
import os, sys import dataloader as dd from keras.optimizers import * from keras.callbacks import * itokens, otokens = dd.MakeS2SDict('data/pinyin.corpus.txt', dict_file='data/pinyin_word.txt') print('seq 1 words:', itokens.num()) print('seq 2 words:', otokens.num()) from transformer import Transformer, LRSchedulerPerStep d_model = 256 s2s = Transformer(itokens, otokens, len_limit=500, d_model=d_model, d_inner_hid=1024, \ n_head=4, layers=3, dropout=0.1) mfile = 'models/pinyin.model.h5' lr_scheduler = LRSchedulerPerStep(d_model, 4000) model_saver = ModelCheckpoint(mfile, monitor='ppl', save_best_only=True, save_weights_only=True) #s2s.model.summary() opt = Adam(0.001, 0.9, 0.98, epsilon=1e-9) s2s.compile(opt) try: s2s.model.load_weights(mfile) except:
# embedding size max_length=100, hidden_units=512, dropout_rate=0.1, lr=0.0001, is_training=True) return params arg = create_hparams() arg.input_vocab_size = len(en_vocab) arg.label_vocab_size = len(zh_vocab) arg.is_training = False arg.dropout_rate = 0. g = Transformer(arg) saver = tf.train.Saver() de_zh_vocab = {v: k for k, v in zh_vocab.items()} with tf.Session() as sess: saver.restore(sess, 'tmp/model.ckpt') for i in range(100): line = encoder_inputs[i * 1000] x = np.array(line) x = x.reshape(1, -1) de_inp = [[zh_vocab['<GO>']]] while True: y = np.array(de_inp) preds = sess.run(g.preds, {g.x: x, g.de_inp: y})
def _setup(self, config): print('NaruTrainer config:', config) os.chdir(config["cwd"]) for k, v in config.items(): setattr(self, k, v) self.epoch = 0 if callable(self.text_eval_corpus): self.text_eval_corpus = self.text_eval_corpus() # Try to make all the runs the same, except for input orderings. torch.manual_seed(0) np.random.seed(0) assert self.dataset in [ 'dmv', 'dmv-full', 'census', 'synthetic', 'kdd', 'kdd-full', 'url', 'url-tiny', 'dryad-urls', 'dryad-urls-small' ] if self.shuffle_at_data_level: data_order_seed = self.order_seed else: data_order_seed = None if self.dataset == 'dmv-full': table = datasets.LoadDmv(full=True, order_seed=data_order_seed) elif self.dataset == 'dmv': table = datasets.LoadDmv(order_seed=data_order_seed) elif self.dataset == 'synthetic': table = datasets.LoadSynthetic(order_seed=data_order_seed) elif self.dataset == 'census': table = datasets.LoadCensus(order_seed=data_order_seed) elif self.dataset == 'kdd': table = datasets.LoadKDD(order_seed=data_order_seed) elif self.dataset == 'kdd-full': table = datasets.LoadKDD(full=True, order_seed=data_order_seed) elif self.dataset == 'url-tiny': table = datasets.LoadURLTiny() elif self.dataset == 'dryad-urls': table = datasets.LoadDryadURLs() elif self.dataset == 'dryad-urls-small': table = datasets.LoadDryadURLs(small=True) self.table = table self.oracle = Oracle( table, cache_dir=os.path.expanduser("~/oracle_cache")) try: self.table_bits = Entropy( self.table, self.table.data.fillna(value=0).groupby( [c.name for c in table.columns]).size(), [2])[0] except Exception as e: print("Error computing table bits", e) self.table_bits = 0 # TODO(ekl) why does dmv-full crash on ec2 fixed_ordering = None if self.special_orders <= 1: fixed_ordering = list(range(len(table.columns))) if self.entropy_order: assert self.num_orderings == 1 res = [] for i, c in enumerate(table.columns): bits = Entropy(c.name, table.data.groupby(c.name).size(), [2]) res.append((bits[0], i)) s = sorted(res, key=lambda b: b[0], reverse=self.reverse_entropy) fixed_ordering = [t[1] for t in s] print('Using fixed ordering:', '_'.join(map(str, fixed_ordering))) print(s) if self.order is not None: print('Using passed-in order:', self.order) fixed_ordering = self.order if self.order_seed is not None and not self.shuffle_at_data_level: if self.order_seed == "reverse": fixed_ordering = fixed_ordering[::-1] else: rng = np.random.RandomState(self.order_seed) rng.shuffle(fixed_ordering) print('Using generated order:', fixed_ordering) print(table.data.info()) self.fixed_ordering = fixed_ordering table_train = table if self.special_orders > 0: special_orders = _SPECIAL_ORDERS[self.dataset][:self.special_orders] k = len(special_orders) seed = self.special_order_seed * 10000 for i in range(k, self.special_orders): special_orders.append( np.random.RandomState(seed + i - k + 1).permutation( np.arange(len(table.columns)))) print('Special orders', np.array(special_orders)) else: special_orders = [] if self.use_transformer: args = { "num_blocks": 4, "d_model": 64, "d_ff": 256, "num_heads": 4, "nin": len(table.columns), "input_bins": [c.DistributionSize() for c in table.columns], "use_positional_embs": True, "activation": "gelu", "fixed_ordering": fixed_ordering, "dropout": False, "seed": self.seed, "first_query_shared": False, "prefix_dropout": self.prefix_dropout, "mask_scheme": 0, # XXX only works for default order? } args.update(self.transformer_args) model = Transformer(**args).to(get_device()) else: model = MakeMade( scale=self.fc_hiddens, cols_to_train=table.columns, seed=self.seed, dataset=self.dataset, fixed_ordering=fixed_ordering, special_orders=special_orders, layers=self.layers, residual=self.residual, embed_size=self.embed_size, dropout=self.dropout, per_row_dropout=self.per_row_dropout, prefix_dropout=self.prefix_dropout, fixed_dropout_ratio=self.fixed_dropout_ratio, input_no_emb_if_leq=self.input_no_emb_if_leq, disable_learnable_unk=self.disable_learnable_unk, embs_tied=self.embs_tied) child = None print(model.nin, model.nout, model.input_bins) blacklist = None mb = ReportModel(model, blacklist=blacklist) self.mb = mb if not isinstance(model, Transformer): print('applying weight_init()') model.apply(weight_init) if isinstance(model, Transformer): opt = torch.optim.Adam( list(model.parameters()) + (list(child.parameters()) if child else []), 2e-4, betas=(0.9, 0.98), eps=1e-9, ) else: opt = torch.optim.Adam( list(model.parameters()) + (list(child.parameters()) if child else []), 2e-4) self.train_data = TableDataset(table_train) self.model = model self.opt = opt if self.checkpoint_to_load: self.model.load_state_dict(torch.load(self.checkpoint_to_load))
class Graph: transformer = Transformer() def __init__(self): return
if config.run_tensorboard: from input_path import train_summary_writer, valid_summary_writer else: train_summary_writer = None valid_summary_writer = None #tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(file_path.subword_vocab_path) train_dataset, val_dataset = create_train_data() train_loss, train_accuracy = get_loss_and_accuracy() validation_loss, validation_accuracy = get_loss_and_accuracy() transformer = Transformer(num_layers=config.num_layers, d_model=config.d_model, num_heads=config.num_heads, dff=config.dff, input_vocab_size=config.input_vocab_size, target_vocab_size=config.target_vocab_size, rate=config.dropout_rate) generator = Generator() # The @tf.function trace-compiles train_step into a TF graph for faster # execution. The function specializes to the precise shape of the argument # tensors. To avoid re-tracing due to the variable sequence lengths or variable # batch sizes (the last batch is smaller), use input_signature to specify # more generic shapes. train_step_signature = [ tf.TensorSpec(shape=(None, None), dtype=tf.int64), tf.TensorSpec(shape=(None, None), dtype=tf.int64), tf.TensorSpec(shape=(None), dtype=tf.int32),
def __init__(self, cfg): super(DETR, self).__init__() self.device = torch.device(cfg.MODEL.DEVICE) # Build Backbone self.backbone = build_backbone(cfg) # Build Transformer self.transformer = Transformer(cfg) self.aux_loss = not cfg.MODEL.DETR.NO_AUX_LOSS self.num_classes = cfg.MODEL.DETR.NUM_CLASSES self.num_queries = cfg.MODEL.DETR.NUM_QUERIES hidden_dim = self.transformer.d_model # Build FFN self.class_embed = nn.Linear(hidden_dim, self.num_classes + 1) self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3) # Build Object Queries self.query_embed = nn.Embedding(self.num_queries, hidden_dim) backbone_out_shapes = self.backbone.output_shape()["res5"] self.input_proj = nn.Conv2d(backbone_out_shapes.channels, hidden_dim, kernel_size=1) self.position_embedding = position_embedding[ cfg.MODEL.DETR.POSITION_EMBEDDING]( num_pos_feats=hidden_dim // 2, temperature=cfg.MODEL.DETR.get("TEMPERATURE", 10000), normalize=True if cfg.MODEL.DETR.POSITION_EMBEDDING else False, scale=None, ) self.weight_dict = { "loss_ce": 1.0, "loss_bbox": cfg.MODEL.DETR.BBOX_LOSS_COEFF, "loss_giou": cfg.MODEL.DETR.GIOU_LOSS_COEFF, } if self.aux_loss: self.aux_weight_dict = {} for i in range(cfg.MODEL.DETR.TRANSFORMER.NUM_DEC_LAYERS - 1): self.aux_weight_dict.update( {k + f"_{i}": v for k, v in self.weight_dict.items()}) self.weight_dict.update(self.aux_weight_dict) losses = ["labels", "boxes", "cardinality"] matcher = HungarianMatcher( cost_class=cfg.MODEL.DETR.COST_CLASS, cost_bbox=cfg.MODEL.DETR.COST_BBOX, cost_giou=cfg.MODEL.DETR.COST_GIOU, ) self.criterion = SetCriterion(self.num_classes, matcher=matcher, weight_dict=self.weight_dict, eos_coef=cfg.MODEL.DETR.EOS_COEFF, losses=losses) self.post_processors = {'bbox': PostProcess()} pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 3, 1, 1) pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 3, 1, 1) self.normalizer = lambda x: (x - pixel_mean) / pixel_std self.to(self.device)
params = TransformerParams() logger = get_logger('validation', params.experiment_dir) logger.info("Logging to {}".format(params.experiment_dir)) # preprocess data dataset = tf.data.Dataset.from_tensor_slices( (questions_encoded, answers_encoded)) input_data = dataset.take(params.num_examples).shuffle(questions_encoded.shape[0]).batch(params.batch_size) \ .prefetch(buffer_size=tf.data.experimental.AUTOTUNE) train_data = input_data.take(params.num_training_batches).repeat( params.num_epochs) valid_data = input_data.skip(params.num_training_batches) model = Transformer(params) model.train(params, train_data, valid_data, logger) # model.inference() ''' HN NOTE: For generalizability of training pipeline, Train steps should be methods of the model and individual train steps should output masked preds + targets But the training loop should be general Training loop should be similar to lstm.py current one, contain - Tensorboard logging - Validation loss + accuracy if i % n - Early stopping check - Outputting samples - Model checkpointing '''
look_ahead_mask, dec_padding_mask) else: return self.predict(input_ids, draft_decoder_type=decoder_type, beam_size=beam_size, length_penalty=length_penalty, temperature=temperature, top_p=top_p, top_k=top_k) if config.model == 'transformer': Model = Transformer( num_layers=config.num_layers, d_model=config.d_model, num_heads=config.num_heads, dff=config.dff, input_vocab_size=config.input_vocab_size, target_vocab_size=config.target_vocab_size, add_pointer_generator=config.add_pointer_generator ) elif config.model == 'bertified_transformer': Model = Bertified_transformer( num_layers=config.num_layers, d_model=config.d_model, num_heads=config.num_heads, dff=config.dff, input_vocab_size=config.input_vocab_size, target_vocab_size=config.target_vocab_size, add_pointer_generator=config.add_pointer_generator )
def evaluate_transformer(): tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file( os.path.join(output_path, tag_new_tok + "tokenizer_en_" + str(DICT_SIZE))) tokenizer_de = tfds.features.text.SubwordTextEncoder.load_from_file( os.path.join(output_path, tag_new_tok + "tokenizer_de_" + str(DICT_SIZE))) input_vocab_size = tokenizer_en.vocab_size + 2 target_vocab_size = tokenizer_de.vocab_size + 2 # using transformer2 as eng-> de transformer2 = Transformer(num_layers, d_model, num_heads, dff, input_vocab_size, target_vocab_size, pe_input=input_vocab_size, pe_target=target_vocab_size, rate=dropout_rate) ckpt = tf.train.Checkpoint(transformer2=transformer2) ckpt.restore(tf.train.latest_checkpoint(checkpoint_path)).expect_partial() print('Latest checkpoint restored!!') # loading different part of training set for backtrans (before :TRAIN_ON) train_on_end = TRAIN_ON + train_backtrans_on split = tfds.Split.TRAIN.subsplit(tfds.percent[TRAIN_ON:train_on_end]) print('Split is: {}'.format(split)) examples, metadata = tfds.load('wmt14_translate/de-en', data_dir=data_path, with_info=True, as_supervised=True, split=split) def filter_max_length(x, y, max_length=MAX_LENGTH): """Function restricting used sequences x and y to <= max_lenght""" return tf.logical_and( tf.size(x) <= max_length, tf.size(y) <= max_length) examples = examples.filter(filter_max_length) train_examples4backtrans = examples print('type of train_examples4backtrans: {}'.format( type(train_examples4backtrans))) print('shape of train_examples4backtrans: {}'.format( tf.data.experimental.cardinality(train_examples4backtrans))) dataset_length = [i for i, _ in enumerate(train_examples4backtrans)][-1] + 1 def predict(inp_sentence): start_token = [tokenizer_en.vocab_size] end_token = [tokenizer_en.vocab_size + 1] # inp sentence is ENGLISH, hence adding the start and end token inp_sentence = start_token + tokenizer_en.encode( inp_sentence) + end_token encoder_input = tf.expand_dims(inp_sentence, 0) # as the target is GERMAN, the first word to the transformer should be the # english start token. decoder_input = [tokenizer_de.vocab_size] output = tf.expand_dims(decoder_input, 0) # predictions.shape == (batch_size, seq_len, vocab_size) def symbols_to_logits(output): batched_input = tf.tile(encoder_input, [beam_width, 1]) enc_padding_mask, combined_mask, dec_padding_mask = create_masks( batched_input, output) predictions, attention_weights = transformer2( batched_input, output, False, enc_padding_mask, combined_mask, dec_padding_mask) predictions = predictions[:, -1, :] return predictions finished_seq, finished_scores, states = beam_search( symbols_to_logits, output, beam_width, MAX_LENGTH, target_vocab_size, alpha, states=None, eos_id=tokenizer_de.vocab_size + 1, stop_early=True, use_tpu=False, use_top_k_with_unique=True) return finished_seq[0, 0, :] def translate(sentence): result = predict(sentence) predicted_sentence = tokenizer_de.decode( [i for i in result if i < tokenizer_de.vocab_size]) print('Input: {}'.format(sentence)) print('Predicted translation: {}'.format(predicted_sentence)) return predicted_sentence translations = [] inputs = [] targets = [] BLEUs = [] i = 0 for sentence in train_examples4backtrans: # eng-> deu : hence indexes reversed inp = sentence[1].numpy().decode('utf-8') target = sentence[0].numpy().decode('utf-8') translation = translate(inp) BLEU = nltk.translate.bleu_score.sentence_bleu( [nltk.word_tokenize(target)], nltk.word_tokenize(translation)) translations.append(translation) inputs.append(inp) BLEUs.append(BLEU) print('Average BLEU score: ', 100 * np.mean(BLEUs)) targets.append(target) # i+=1 # store backtrans every 800 sentences # if i % 800 == 0: # d = {'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs} # df = pd.DataFrame.from_dict(d) # df.to_csv(os.path.join(output_path, 'results_backtrans_' + experiment_name + '_interm_'+str(i)+'.csv')) d = { 'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs } df = pd.DataFrame.from_dict(d) df.to_csv( os.path.join(output_path, 'results_backtrans_' + experiment_name + '.csv')) print('Average BLEU score: ', 100 * np.mean(BLEUs))
lr_scheduler = LRSchedulerPerStep( d_model, 4000) # there is a warning that it is slow, however, it's ok. # lr_scheduler = LRSchedulerPerEpoch(d_model, 4000, Xtrain.shape[0]/64) # this scheduler only update lr per epoch model_saver = ModelCheckpoint(mfile, save_best_only=True, save_weights_only=True) ########################################### if 'sparse' in sys.argv: initParams = initSparseWeights(epsilon, n_head=n_head, d_k=d_k, d_v=d_v, layers=layers) s2s = Transformer(itokens, otokens, len_limit=len_limit, d_model=d_model, d_inner_hid=d_inner_hid, \ n_head=n_head, d_k=d_k, d_v=d_v, layers=layers, dropout=dropout, weightsForSparsity=initParams) s2s.compile(adam) s2s.model.summary() if 'load_existing_model' in sys.argv: s2s.model.summary() try: s2s.model.load_weights(mfile) except: print('\n\nnew model') else: print('*** New model ***') for epoch in range(0, maxepoches): print('epoch #' + str(epoch))
descriptions = df['description'].tolist() FT = FilteredTokenizer() Tokens = FT.filter_and_tokenize(descriptions, mode=TOKEN_FILTERS, tokenizer=TOKENIZER, filter_fpath=CUSTOM_FILTER_PATH) WordEmbedding_ = WordEmbedding() WordEmbedding_.load() print("====== Examples of things you can do with the embeddings =======") print(WordEmbedding_.word_vectors.most_similar(positive=['woman', 'king'], negative=['man'])) print(WordEmbedding_.word_vectors.most_similar("dont")) print(WordEmbedding_.word_vectors.most_similar("a")) matched_tokens, unmatched_tokens = WordEmbedding_.check_embedding_coverage(list_tokens=Tokens, verbose=True) # Then you will get a file named <embedding file name> + <date time> + unmatched tokens # this is a file with count distinct unmatched tokens, sorted in descending order # Then you are able to see these attributes: print("WordEmbedding_.coverage", WordEmbedding_.coverage) # print("WordEmbedding_.wordvec_map", WordEmbedding_.wordvec_map) print("You can get a word vector of the word 'hello' by calling: WordEmbedding_.word_vectors.get_vector('hello')", WordEmbedding_.word_vectors.get_vector('hello')) T = Transformer(WordEmbedding_.wordvec_map) # will convert the points numbers (score values) into one-hot vectors of categories defined by us (interval) # You can change the setting in config y = df['points'].tolist() X, y = T.fit_transform(Tokens, y, drop_long_sentences=DROP_LONG_SENTENCES, drop_short_sentences=DROP_SHORT_SENTENCES, num2cat_=CONVERT_Y, intervals=Y_CAT_INTERVALS) print("X.shape, y.shape ", X.shape, y.shape)