#print("bounds")
    #print(upper_confidence_bound(np.asarray([[0.00617284, 0.48765432]])))
    min_val = scipydirect.minimize(neg_upper_confidence_bound,bounds)
    xval = min_val.x

    acc_targets = multi_fid_values['accuracy_targets']+[0.0]
    out_fid_level = num_fidelities-1# defaults to highest fidelity function
    for fid_level,(acc,reg) in enumerate(zip(acc_targets,regressors)):
        mean,stdev = reg.predict([min_val.x], return_std=True)
        if stdev*beta > acc:
            out_fid_level = fid_level
            break

    yval = -neg_upper_confidence_bound([xval])
    return xval,yval,out_fid_level

if __name__ == "__main__":
    assert len(sys.argv) == 2 , "needs one parameter, the data filename."
    data = json.load(open(sys.argv[1]))

    trans = Transformer(data)
    #ys,xs = parse_data(data,trans)
    #bounds = trans.get_bounds()
    #print(xs)
    #print(ys)
    #print(bounds)
    res = next_point(data,trans)
    print(res)
    inv_res = trans.inverse_point(res[0])
    print(inv_res)
Exemple #2
0
def encode_char(c):
    return ord(c) - 32


def encode_smiles(string, start_char=EXTRA_CHARS['seq_start']):
    return torch.tensor([ord(start_char)] + [encode_char(c) for c in string],
                        dtype=torch.long)[:args.max_length].unsqueeze(0)


smiles_strings = [line.strip("\n") for line in open(args.data_path, "r")]
print("Loaded {0} SMILES strings from {1}".format(len(smiles_strings),
                                                  args.data_path))

print("Initializing Transformer...")
model = Transformer(ALPHABET_SIZE, args.embedding_size, args.num_layers).eval()
model = torch.nn.DataParallel(model)
print("Transformer Initialized.")

print("Loading pretrained weights from", args.checkpoint_path)
checkpoint = torch.load(args.checkpoint_path, map_location=torch.device("cpu"))
model.load_state_dict(checkpoint['state_dict'])
print("Pretrained weights loaded")
model = model.module.cpu()
encoder = model.encoder.cpu()

embeddings = []
with torch.no_grad():
    for smiles in smiles_strings:
        encoded = encode_smiles(smiles)
        mask = create_masks(encoded)
                class_truth = torch.empty(B_hat.shape[0], dtype=torch.long).fill_(class1_label).cpu()
                loss_cls_val = loss_cls(class_prediction, class_truth)
                loss = loss_mse_val + wt*loss_cls_val

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            print('[epoch %d/%d] loss: %f, mse_loss: %f, cls_loss: %f'%(epoch+1, n_epoch, loss.item(), loss_mse_val.item(), loss_cls_val.item()))
        torch.save({'state_dict':transformer.state_dict(), 'optimizer': optimizer.state_dict()}, save_model_path+'transformer_%d.pth'%(class1_label))

if __name__ == '__main__':
    feature_dim = 512
    n_classes = 80
    
    classes_centroids_npy = 'classes_centroids.npy'
    classes_centroids = np.load(classes_centroids_npy)
    ResNet_model_path = 'Res_trained_39.pkl'
    
    my_transformer = Transformer(feature_dim)
    # my_classifier = ResNetFeat.ResNet18(num_classes=80)
    # my_classifier.load_state_dict(torch.load(ResNet_model_path))
    my_classifier = torch.load(ResNet_model_path)
    optimizer = torch.optim.Adam(my_transformer.parameters(), lr=1e-4)
    # print(my_classifier)

    for i in range(n_classes):
        for j in range(i+1, n_classes):
            class1_centroids = classes_centroids[i]
            class2_centroids = classes_centroids[j]
            train(my_transformer, my_classifier, optimizer, class1_centroids, class2_centroids, i)
from torch.utils.data import DataLoader
from transformer import Transformer, Config
from data.dataset import CorpusDataset, TokenSentenceConverter
import tqdm
from evaluation.translate import translate_batch
from config import *

model = Transformer(Config(model_config))
model.load_state_dict(torch.load('model_state_dict/5epoch/transformer.pkl'))
model.cuda()
model.eval()
batch_size = 25
converter = TokenSentenceConverter('data/vocab.pkl')
dataset = CorpusDataset('data/corpus/test_en',
                        'data/corpus/test_cn',
                        converter,
                        to_token=False)
dataloader = DataLoader(dataset,
                        batch_size=batch_size,
                        shuffle=False,
                        collate_fn=lambda x:
                        ([s[0] for s in x], [s[1] for s in x]))
translate = lambda x: translate_batch(model, converter, [x])[1]
bleu1 = bleu2 = bleu3 = bleu4 = 0
dataloader = iter(dataloader)
batches = 2
translate_result = 'Top 50 Results:\n\n'
with torch.no_grad(), tqdm.tqdm(range(batches)) as t:
    for _ in t:
        src, tgt = next(dataloader)
        result, s = translate_batch(model, converter, src, tgt)
def do_evaluation(user_config, input_file_path, target_file_path,
                  pred_file_path):
    inp_language = user_config["inp_language"]
    target_language = user_config["target_language"]

    print("\n****Evaluating model from {} to {}****\n".format(
        inp_language, target_language))

    print("****Loading Sub-Word Tokenizers****")
    # load pre-trained tokenizer
    tokenizer_inp, tokenizer_tar = utils.load_tokenizers(
        inp_language, target_language, user_config)

    print("****Initializing DataLoader****")
    # dummy data loader. required for loading checkpoint
    dummy_dataloader = DataLoader(
        user_config["transformer_batch_size"],
        user_config["dummy_data_path_{}".format(inp_language)], None,
        tokenizer_inp, tokenizer_tar, inp_language, target_language, False)
    dummy_dataset = dummy_dataloader.get_data_loader()

    # data loader
    test_dataloader = DataLoader(user_config["transformer_batch_size"],
                                 input_file_path, target_file_path,
                                 tokenizer_inp, tokenizer_tar, inp_language,
                                 target_language, False)
    test_dataset = test_dataloader.get_data_loader()

    input_vocab_size = tokenizer_inp.vocab_size
    target_vocab_size = tokenizer_tar.vocab_size

    use_pretrained_emb = user_config["use_pretrained_emb"]
    if use_pretrained_emb:
        pretrained_weights_inp = np.load(
            user_config["pretrained_emb_path_{}".format(inp_language)])
        pretrained_weights_tar = np.load(
            user_config["pretrained_emb_path_{}".format(target_language)])
    else:
        pretrained_weights_inp = None
        pretrained_weights_tar = None

    transformer_model = Transformer(
        user_config["transformer_num_layers"],
        user_config["transformer_model_dimensions"],
        user_config["transformer_num_heads"],
        user_config["transformer_dff"],
        input_vocab_size,
        target_vocab_size,
        en_input=input_vocab_size,
        fr_target=target_vocab_size,
        rate=user_config["transformer_dropout_rate"],
        weights_inp=pretrained_weights_inp,
        weights_tar=pretrained_weights_tar)

    sacrebleu_metric(transformer_model, pred_file_path, None, tokenizer_tar,
                     dummy_dataset, tokenizer_tar.MAX_LENGTH)

    print("****Loading Model****")
    # load model
    model_path = user_config["model_file"]
    transformer_model.load_weights(model_path)

    print("****Generating Translations****")
    sacrebleu_metric(transformer_model, pred_file_path, target_file_path,
                     tokenizer_tar, test_dataset, tokenizer_tar.MAX_LENGTH)
Exemple #6
0
    n_pixels = 50
    total_pixels = int(n_pixels * n_pixels)
    pixel_scale = 0.1
    x_grid_in_radians, y_grid_in_radians = grid_2d_in_radians(
        n_pixels=n_pixels, pixel_scale=pixel_scale
    )
    grid_1d_in_radians = np.array([
        np.ndarray.flatten(y_grid_in_radians),
        np.ndarray.flatten(x_grid_in_radians)
    ]).T
    # plot_grid(x_grid=x_grid_in_radians, y_grid=y_grid_in_radians)
    # exit()

    transformer = Transformer(
        uv_wavelengths=uv_wavelengths,
        grid=grid_1d_in_radians,
        preload_transform=True
    )
    #print(transformer.cube_shape);exit()

    theta = [
        int(n_pixels / 2.0),
        int(n_pixels / 2.0),
        transformer.n_channels / 2.0,
        0.25,
        0.75 / pixel_scale,
        50.0,
        65.0,
        0.2 / pixel_scale,
        300.0,
        50.0
Exemple #7
0
def do_train(args):
    device = paddle.set_device("gpu" if args.use_cuda else "cpu")
    fluid.enable_dygraph(device) if args.eager_run else None

    # set seed for CE
    random_seed = eval(str(args.random_seed))
    if random_seed is not None:
        fluid.default_main_program().random_seed = random_seed
        fluid.default_startup_program().random_seed = random_seed

    # define inputs
    inputs = [
        Input(
            [None, None], "int64", name="src_word"),
        Input(
            [None, None], "int64", name="src_pos"),
        Input(
            [None, args.n_head, None, None],
            "float32",
            name="src_slf_attn_bias"),
        Input(
            [None, None], "int64", name="trg_word"),
        Input(
            [None, None], "int64", name="trg_pos"),
        Input(
            [None, args.n_head, None, None],
            "float32",
            name="trg_slf_attn_bias"),
        Input(
            [None, args.n_head, None, None],
            "float32",
            name="trg_src_attn_bias"),
    ]
    labels = [
        Input(
            [None, 1], "int64", name="label"),
        Input(
            [None, 1], "float32", name="weight"),
    ]

    # def dataloader
    (train_loader, train_steps_fn), (
        eval_loader, eval_steps_fn) = create_data_loader(args, device)

    # define model
    model = paddle.Model(
        Transformer(args.src_vocab_size, args.trg_vocab_size,
                    args.max_length + 1, args.n_layer, args.n_head, args.d_key,
                    args.d_value, args.d_model, args.d_inner_hid,
                    args.prepostprocess_dropout, args.attention_dropout,
                    args.relu_dropout, args.preprocess_cmd,
                    args.postprocess_cmd, args.weight_sharing, args.bos_idx,
                    args.eos_idx), inputs, labels)

    model.prepare(
        fluid.optimizer.Adam(
            learning_rate=fluid.layers.noam_decay(
                args.d_model,
                args.warmup_steps,
                learning_rate=args.learning_rate),
            beta1=args.beta1,
            beta2=args.beta2,
            epsilon=float(args.eps),
            parameter_list=model.parameters()),
        CrossEntropyCriterion(args.label_smooth_eps))

    ## init from some checkpoint, to resume the previous training
    if args.init_from_checkpoint:
        model.load(args.init_from_checkpoint)
    ## init from some pretrain models, to better solve the current task
    if args.init_from_pretrain_model:
        model.load(args.init_from_pretrain_model, reset_optimizer=True)

    # model train
    model.fit(train_data=train_loader,
              eval_data=eval_loader,
              epochs=args.epoch,
              eval_freq=1,
              save_freq=1,
              save_dir=args.save_model,
              callbacks=[
                  TrainCallback(
                      args,
                      train_steps_fn=train_steps_fn,
                      eval_steps_fn=eval_steps_fn)
              ])
Exemple #8
0
def train(args):
    print("Start Time:\t{}".format(time.ctime()))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model1 = Transformer()
    model2 = Transformer()
    state_dict1 = torch.load(args.model1)
    state_dict2 = torch.load(args.model2)
    model1.load_state_dict(state_dict1)
    model2.load_state_dict(state_dict2)
    model1.to(device)
    model2.to(device)
    vgg = VGG16().to(device)

    train_dataset = datasets.ImageFolder(
        args.datapath,
        transforms.Compose([
            transforms.Resize(args.image_size),
            transforms.CenterCrop(args.image_size),
            transforms.ToTensor(),
            transforms.Lambda(lambda x: x.mul(255))
        ]))

    train_loader = DataLoader(train_dataset, batch_size=args.batch_size)
    transformer = Transformer(norm='instance', padding='reflect').to(device)
    optimizer = Adam(transformer.parameters(), args.lr)
    mse_loss = torch.nn.MSELoss()

    loss = []
    run_time = time.strftime("%d-%H-%M-%S")

    for epoch_num in range(args.epochs):
        transformer.train()
        agg_one_loss = 0.0
        agg_two_loss = 0.0
        count = 0

        for batch_id, (x, _) in enumerate(train_loader):
            n_batch = len(x)
            count += n_batch
            optimizer.zero_grad()
            content = x.to(device)

            y_hat = transformer(content)
            y_model1 = model1(content)
            y_model2 = model2(content)

            features_yh = vgg(normalize(y_hat))
            features_y1 = vgg(normalize(y_model1))
            features_y2 = vgg(normalize(y_model2))

            # Do this but with losses from the output of the VGG blocks
            # one_loss = mse_loss(y_hat, y_model1)
            # two_loss = mse_loss(y_hat, y_model2)
            one_loss = sum(
                np.array([
                    mse_loss(feat_yh, feat_y1) for feat_yh, feat_y1 in zip(
                        features_yh.values(), features_y1.values())
                ]))
            two_loss = sum(
                np.array([
                    mse_loss(feat_yh, feat_y2) for feat_yh, feat_y2 in zip(
                        features_yh.values(), features_y2.values())
                ]))

            total_loss = one_loss + two_loss
            total_loss.backward()
            optimizer.step()

            agg_one_loss += one_loss.item()
            agg_two_loss += two_loss.item()

            if (batch_id + 1) % args.log_interval == 0:
                mesg = "[{}/{}]\tTotal: {:.2f}\tModel 1: {:.2f}\tModel 2: {:.2f}".format(
                    count,
                    len(train_dataset),
                    (agg_one_loss + agg_two_loss) / (batch_id + 1),
                    agg_one_loss / (batch_id + 1),
                    agg_two_loss / (batch_id + 1),
                )
                print(mesg)

                loss.append([
                    batch_id + 1, agg_one_loss / (batch_id + 1),
                    agg_two_loss / (batch_id + 1),
                    (agg_one_loss + agg_two_loss) / (batch_id + 1)
                ])

            if args.checkpoint_dir is not None and (
                    batch_id + 1) % args.checkpoint_interval == 0:
                transformer.eval().cpu()
                ckpt_model_filename = "ckpt_epoch_" + str(
                    epoch_num + 1) + "_batch_id_" + str(batch_id + 1) + ".pth"
                ckpt_model_path = os.path.join(args.checkpoint_dir,
                                               ckpt_model_filename)
                torch.save(transformer.state_dict(), ckpt_model_path)
                transformer.to(device).train()
                save_loss_plot(
                    np.array(loss),
                    args.log_dir + '/train_loss{}.jpg'.format(run_time))

    # save model and parameter log
    transformer.eval().cpu()

    if args.savename is None:
        save_model_filename = "epoch_" + str(args.epochs) + "_" + str(
            time.strftime("%d-%H-%M-%S")) + ".model"
    else:
        save_model_filename = args.savename

    save_model_path = os.path.join(args.save_dir, save_model_filename)
    torch.save(transformer.state_dict(), save_model_path)

    # save loss in pickle file
    with open('{}/loss{}'.format(args.log_dir, run_time), 'wb') as fp:
        pickle.dump(loss, fp)

    with open('{}/param_log{}.txt'.format(args.log_dir, run_time), 'w') as f:
        f.write("Epochs: {}\n".format(args.epochs))
        f.write("Batch Size: {}\n".format(args.batch_size))
        f.write("Dataset: {}\n".format(args.datapath))
        f.write("Learning Rate: {}\n".format(args.lr))
        f.write("Model 1: {}\n".format(args.model1))
        f.write("Model 2: {}\n".format(args.model2))

    print("\nDone, trained model saved at", save_model_path)
                a[j] = 0
                if j == j1:
                    j1 -= 1
                else:
                    if j == j0:
                        j0 += 1
    p = 0.0
    current = 0
    i = n
    while current < x:
        i -= 1
        current += 1
        p += a[i]
    if math.fabs(p - 1) < 1e-10:
        p = 1
    return p


sx = [int(x) for x in open("sequence.txt")]
if (max(sx) > 1):
    t = Transformer(sx)
    s = t.toUniform(0, 1)
chi(s)
serial(s)
gap(s)
poker(s)
permutation(s)
monotonic(s)
conflict(s)

nb = input()
Exemple #10
0
def main(args):
    # Construct Solver
    # data
    tr_dataset = AudioDataset(args.train_json,
                              args.batch_size,
                              args.maxlen_in,
                              args.maxlen_out,
                              batch_frames=args.batch_frames)
    cv_dataset = AudioDataset(args.valid_json,
                              args.batch_size,
                              args.maxlen_in,
                              args.maxlen_out,
                              batch_frames=args.batch_frames)
    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                shuffle=args.shuffle,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    # load dictionary and generate char_list, sos_id, eos_id
    char_list, sos_id, eos_id = process_dict(args.dict)
    vocab_size = len(char_list)
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}
    # model
    encoder = Encoder(args.d_input * args.LFR_m,
                      args.n_layers_enc,
                      args.n_head,
                      args.d_k,
                      args.d_v,
                      args.d_model,
                      args.d_inner,
                      dropout=args.dropout,
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen)
    model = Transformer(encoder, decoder)
    print(model)
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    #model.cuda()
    model.to(device)
    # optimizer
    optimizier = TransformerOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.k, args.d_model, args.warmup_steps)

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
Exemple #11
0
from transformer import Transformer
import torch

transformer_model = Transformer(nhead=4, num_encoder_layers=2)

src = torch.rand((10, 32, 512))
# 10 is the number of words in the sentence, 32 is the batch size and 512 is the dimensionality of a word??
tgt = torch.rand((20, 32, 512))
out, loss = transformer_model(src, tgt)

print(out.shape)

Exemple #12
0
                       collate_fn=generateBatch)
testIter = DataLoader(testData,
                      batch_size=BATCH_SIZE,
                      shuffle=True,
                      collate_fn=generateBatch)

### BUILD MODEL
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = Transformer(
    embeddingSize=256,
    srcVocabSize=len(sourceVocab),
    trgVocabSize=len(targetVocab),
    srcPadIdx=PAD_IDX,
    numHeads=8,
    numEncoderLayers=3,
    numDecoderLayers=3,
    forwardExpansion=4,
    dropout=0.2,
    maxLen=350,
    device=device,
).to(device)

optimizer = optim.Adam(model.parameters(), lr=0.0003)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                       factor=0.1,
                                                       patience=10,
                                                       verbose=True)
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

### TRAIN AND EVALUATE
Exemple #13
0
def main():
    train_data = SentenceDataset(args.train_file,
                                 encoding_type=args.encoding_type,
                                 filter_threshold=args.filter_threshold)
    val_data = SentenceDataset(args.val_file,
                               encoding_type=args.encoding_type,
                               filter_threshold=args.filter_threshold)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               args.batch_size,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_data, args.batch_size)

    print(len(train_loader))

    input_dim = len(train_data.vocab.source_vocab)
    output_dim = len(train_data.vocab.target_vocab)
    static = args.embedding_type == 'static'

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    enc_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len,
                               device, static)
    encoder_layer = EncoderLayer(args.hidden_dim, args.num_enc_heads,
                                 args.inner_dim, args.dropout)
    encoder = Encoder(enc_embedding, encoder_layer, args.num_enc_layers,
                      args.dropout)

    dec_embedding = Embeddings(input_dim, args.hidden_dim, args.max_len,
                               device, static)
    decoder_layer = DecoderLayer(args.hidden_dim, args.num_dec_heads,
                                 args.inner_dim, args.dropout)
    decoder = Decoder(output_dim, args.hidden_dim, dec_embedding,
                      decoder_layer, args.num_dec_layers, args.dropout)

    pad_id = train_data.vocab.source_vocab['<pad>']

    model = Transformer(encoder, decoder, pad_id, device)

    print('Transformer has {:,} trainable parameters'.format(
        count_parames(model)))

    if args.load_model is not None:
        model.load(args.load_model)
    else:
        model.apply(init_weights)

    if args.mode == 'test':
        inferencer = Inferencer(model, train_data.vocab, device)
        greedy_out = inferencer.infer_greedy(
            'helo world, I m testin a typo corector')
        print(greedy_out)

    elif args.mode == 'train':
        optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

        loss_function = nn.NLLLoss(ignore_index=pad_id)

        print('Started training...')
        train(model, train_loader, val_loader, optimizer, loss_function,
              device)

    else:
        raise ValueError('Mode not recognized')
Exemple #14
0
 def __create_transformer_block(self, num_transformers, dropout=0.3):
     transformers = []
     for i in range(num_transformers):
         transformers.append(Transformer(dim_embedding=self.dim_embedding, num_heads=self.num_heads, dropout=dropout))
     return nn.Sequential(*transformers)
Exemple #15
0
                print('Steps {} Loss {:.4f}'.format(s, train_loss.result()))
            self.train_step(self.train_iter.next())
        print('Steps {} Loss {:.4f}'.format(steps, train_loss.result()))
        self.model.save()
        print('model saved')
        print('training finished')


if __name__ == "__main__":
    #train_data = VQA(r'D:\documents\coding\Data\coco\v2_mscoco_train2014_annotations.json',
    #r'D:\documents\coding\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json',
    #r'D:\documents\coding\Data\coco\train2014\COCO_train2014_{0}.jpg',
    #r'D:\documents\coding\Data\coco\v2_mscoco_train2014_complementary_pairs.json')
    train_data = VQA(
        r'D:\lgy\Document\Python\Data\coco\v2_mscoco_train2014_annotations.json',
        r'D:\lgy\Document\Python\Data\coco\v2_OpenEnded_mscoco_train2014_questions.json',
        r'D:\lgy\Document\Python\Data\coco\train2014\COCO_train2014_{0}.jpg')

    train_iter = VQAIter(train_data,
                         train_data.getQuesIds(ansTypes=['other', 'yes/no']),
                         hp.batch_size, hp.num_chunks)

    max_qst_len = hp.max_qst_len
    max_ans_len = hp.max_ans_len

    model = Transformer(hp.num_layers, hp.d_model, hp.num_heads, hp.dff,
                        max_qst_len + 3, hp.dropout_rate)
    trainer = Trainer(train_iter, model, 16, max_qst_len, max_ans_len)
    trainer.train(hp.steps, hp.steps_per_save, hp.steps_per_chunk,
                  hp.steps_per_report)
Exemple #16
0
def evaluate_transformer():
    tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(os.path.join(output_path, tag_new_tok + "tokenizer_en_" + str(DICT_SIZE)))
    tokenizer_de = tfds.features.text.SubwordTextEncoder.load_from_file(os.path.join(output_path, tag_new_tok + "tokenizer_de_" + str(DICT_SIZE)))
    input_vocab_size = tokenizer_de.vocab_size + 2
    target_vocab_size = tokenizer_en.vocab_size + 2

    transformer1 = Transformer(num_layers, d_model, num_heads, dff,
                              input_vocab_size, target_vocab_size,
                              pe_input=input_vocab_size,
                              pe_target=target_vocab_size,
                              rate=dropout_rate)


    ckpt = tf.train.Checkpoint(transformer1=transformer1)
    ckpt.restore(tf.train.latest_checkpoint(checkpoint_path)).expect_partial()
    print('Latest checkpoint restored!!')
    examples, metadata = tfds.load('wmt14_translate/de-en', data_dir=data_path, with_info=True,
                                   as_supervised=True)
    test_examples = examples['test']

    def predict(inp_sentence):
      start_token = [tokenizer_de.vocab_size]
      end_token = [tokenizer_de.vocab_size + 1]

      # inp sentence is german, hence adding the start and end token
      inp_sentence = start_token + tokenizer_de.encode(inp_sentence) + end_token
      encoder_input = tf.expand_dims(inp_sentence, 0)

      # as the target is english, the first word to the transformer should be the
      # english start token.
      decoder_input = [tokenizer_en.vocab_size]
      output = tf.expand_dims(decoder_input, 0)
      

      # predictions.shape == (batch_size, seq_len, vocab_size)
      def symbols_to_logits(output):          
          batched_input = tf.tile(encoder_input, [beam_width, 1])
          enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
            batched_input, output)
          predictions, attention_weights = transformer1(batched_input,
                                                     output,
                                                     False,
                                                     enc_padding_mask,
                                                     combined_mask,
                                                     dec_padding_mask)
          predictions = predictions[:, -1, :]

          return  predictions
      
      finished_seq, finished_scores, states= beam_search(symbols_to_logits,
                 output,
                 beam_width,
                 MAX_LENGTH,
                 target_vocab_size,
                 alpha,
                 states=None,
                 eos_id=tokenizer_en.vocab_size+1,
                 stop_early=True,
                 use_tpu=False,
                 use_top_k_with_unique=True)
      
      return finished_seq[0, 0, :]

    def translate(sentence):
      result = predict(sentence)
      predicted_sentence = tokenizer_en.decode([i for i in result
                                                if i < tokenizer_en.vocab_size])

      print('Input: {}'.format(sentence))
      print('Predicted translation: {}'.format(predicted_sentence))
      return  predicted_sentence

    translations = []
    inputs = []
    targets = []
    BLEUs = []
    for sentence in test_examples:
        inp = sentence[0].numpy().decode('utf-8')
        target = sentence[1].numpy().decode('utf-8')
        translation = translate(inp)
        BLEU = nltk.translate.bleu_score.sentence_bleu([nltk.word_tokenize(target)], nltk.word_tokenize(translation))
        translations.append(translation)
        inputs.append(inp)
        BLEUs.append(BLEU)
        print('Average BLEU score: ', 100 * np.mean(BLEUs))
        targets.append(target)

    d = {'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs}
    df = pd.DataFrame.from_dict(d)
    df.to_csv(os.path.join(output_path, 'results_'+experiment_name+'.csv'))
    print('Average BLEU score: ', 100 * np.mean(BLEUs))
 def transform(self, data):
     transformer = Transformer()
     transformer.transform_categorial(data, self.categorial_name, self.C)
     return transformer.data
Exemple #18
0
from prefect import Flow, task
from extracter import Extracter
from transformer import Transformer
from loader import Loader

with Flow("ETL") as flow:
    url = 'https://www.marketbeat.com/stocks/NASDAQ/MSFT/price-target/?MostRecent=0'
    e = Extracter(url).extract()
    df = Transformer().transform(text=e)
    l = Loader().load(df)

flow.run()


Exemple #19
0
def train(fv, model_name, criterion, balance=False, batchsize=64, size=0):
    if fv == "matlab":
        dloader = matloader
    else:
        dloader = fvloader

    train_data = dloader.load_train_data(size=size, balance=balance, fv=fv)
    val_data = dloader.load_val_data(size=size, fv=fv)
    test_data = dloader.load_test_data(size=size, fv=fv)
    # model_name = "transformer_%s_size%d_bce" % (fv, size)
    model_dir = os.path.join("./modeldir/%s" % model_name)
    model_pth = os.path.join(model_dir, "model.pth")

    writer = tensorboardX.SummaryWriter(model_dir)

    if os.path.exists(model_pth):
        print("------load model--------")
        model = torch.load(model_pth)
    else:
        # model = Transformer(fv, NUM_HEADS=4, NUM_LAYERS=3).cuda()
        model = Transformer(fv).cuda()
    model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.0001, weight_decay=0.001)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #         optimizer, factor=0.5,
    #         patience=30, min_lr=1e-4)

    epochs = 2000
    step = 1
    val_step = 1
    max_f1 = 0.0

    for e in range(epochs):
        model.train()
        print("------epoch--------", e)
        st = time.time()

        train_shuffle = fvloader.shuffle(train_data)
        for item in fvloader.batch_fv(train_shuffle, batch=batchsize):

            # for name, param in model.named_parameters():
            #     writer.add_histogram(
            #         name, param.clone().cpu().data.numpy(), step)

            # writer.add_histogram(
            #     "grad/"+name, param.grad.clone().cpu().data.numpy(), step)
            model.zero_grad()

            genes, nimgs, labels, timesteps = item
            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)

            gt = torch.from_numpy(labels).type(torch.cuda.FloatTensor)
            pd = model(inputs)

            # loss = criterion(pd, gt)
            all_loss = criterion(pd, gt)
            label_loss = torch.mean(all_loss, dim=0)
            loss = torch.mean(label_loss)
            # for i in range(6):
            #     writer.add_scalar("train sl_%d_loss" % i,
            #                       label_loss[i].item(), step)

            train_pd = torch_util.threshold_tensor_batch(pd)
            np_pd = train_pd.data.cpu().numpy()
            torch_util.torch_metrics(
                labels, np_pd, writer, step, mode="train")

            writer.add_scalar("train loss", loss, step)
            loss.backward()
            optimizer.step()
            step += 1

        et = time.time()
        writer.add_scalar("train time", et - st, e)
        for param_group in optimizer.param_groups:
            writer.add_scalar("lr", param_group['lr'], e)

        # run_origin_train(model, imbtrain_data, writer, e, criterion)

        if e % 1 == 0:
            val_loss, val_f1 = run_val(
                model, dloader, val_data, writer, val_step, criterion)
            # scheduler.step(val_loss)
            val_step += 1
            if e == 0:
                start_loss = val_loss
                min_loss = start_loss

            # if val_loss > 2 * min_loss:
            #     print("early stopping at %d" % e)
            #     break
            # if e % 50 == 0:
            #     pt = os.path.join(model_dir, "%d.pt" % e)
            #     torch.save(model.state_dict(), pt)
            #     result = os.path.join(model_dir, "result_epoch%d.txt" % e)
            #     run_test(model, test_data, result)

            if min_loss > val_loss or max_f1 < val_f1:
                if min_loss > val_loss:
                    print("---------save best----------", "loss", val_loss)
                    min_loss = val_loss
                if max_f1 < val_f1:
                    print("---------save best----------", "f1", val_f1)
                    max_f1 = val_f1
                torch.save(model, model_pth)
                result = os.path.join(model_dir, "result_epoch%d.txt" % e)
                run_test(model, dloader, test_data, result)
import os, sys
import dataloader as dd
from keras.optimizers import *
from keras.callbacks import *

itokens, otokens = dd.MakeS2SDict('data/pinyin.corpus.txt',
                                  dict_file='data/pinyin_word.txt')

print('seq 1 words:', itokens.num())
print('seq 2 words:', otokens.num())

from transformer import Transformer, LRSchedulerPerStep

d_model = 256
s2s = Transformer(itokens, otokens, len_limit=500, d_model=d_model, d_inner_hid=1024, \
       n_head=4, layers=3, dropout=0.1)

mfile = 'models/pinyin.model.h5'
lr_scheduler = LRSchedulerPerStep(d_model, 4000)
model_saver = ModelCheckpoint(mfile,
                              monitor='ppl',
                              save_best_only=True,
                              save_weights_only=True)

#s2s.model.summary()
opt = Adam(0.001, 0.9, 0.98, epsilon=1e-9)
s2s.compile(opt)

try:
    s2s.model.load_weights(mfile)
except:
Exemple #21
0
        # embedding size
        max_length=100,
        hidden_units=512,
        dropout_rate=0.1,
        lr=0.0001,
        is_training=True)
    return params


arg = create_hparams()
arg.input_vocab_size = len(en_vocab)
arg.label_vocab_size = len(zh_vocab)
arg.is_training = False
arg.dropout_rate = 0.

g = Transformer(arg)

saver = tf.train.Saver()

de_zh_vocab = {v: k for k, v in zh_vocab.items()}

with tf.Session() as sess:
    saver.restore(sess, 'tmp/model.ckpt')
    for i in range(100):
        line = encoder_inputs[i * 1000]
        x = np.array(line)
        x = x.reshape(1, -1)
        de_inp = [[zh_vocab['<GO>']]]
        while True:
            y = np.array(de_inp)
            preds = sess.run(g.preds, {g.x: x, g.de_inp: y})
Exemple #22
0
    def _setup(self, config):
        print('NaruTrainer config:', config)
        os.chdir(config["cwd"])
        for k, v in config.items():
            setattr(self, k, v)
        self.epoch = 0

        if callable(self.text_eval_corpus):
            self.text_eval_corpus = self.text_eval_corpus()

        # Try to make all the runs the same, except for input orderings.
        torch.manual_seed(0)
        np.random.seed(0)

        assert self.dataset in [
            'dmv', 'dmv-full', 'census',
            'synthetic', 'kdd', 'kdd-full', 'url', 'url-tiny', 'dryad-urls',
            'dryad-urls-small'
        ]
        if self.shuffle_at_data_level:
            data_order_seed = self.order_seed
        else:
            data_order_seed = None
        if self.dataset == 'dmv-full':
            table = datasets.LoadDmv(full=True, order_seed=data_order_seed)
        elif self.dataset == 'dmv':
            table = datasets.LoadDmv(order_seed=data_order_seed)
        elif self.dataset == 'synthetic':
            table = datasets.LoadSynthetic(order_seed=data_order_seed)
        elif self.dataset == 'census':
            table = datasets.LoadCensus(order_seed=data_order_seed)
        elif self.dataset == 'kdd':
            table = datasets.LoadKDD(order_seed=data_order_seed)
        elif self.dataset == 'kdd-full':
            table = datasets.LoadKDD(full=True, order_seed=data_order_seed)
        elif self.dataset == 'url-tiny':
            table = datasets.LoadURLTiny()
        elif self.dataset == 'dryad-urls':
            table = datasets.LoadDryadURLs()
        elif self.dataset == 'dryad-urls-small':
            table = datasets.LoadDryadURLs(small=True)
        self.table = table
        self.oracle = Oracle(
            table, cache_dir=os.path.expanduser("~/oracle_cache"))
        try:
            self.table_bits = Entropy(
                self.table,
                self.table.data.fillna(value=0).groupby(
                    [c.name for c in table.columns]).size(), [2])[0]
        except Exception as e:
            print("Error computing table bits", e)
            self.table_bits = 0  # TODO(ekl) why does dmv-full crash on ec2

        fixed_ordering = None
        if self.special_orders <= 1:
            fixed_ordering = list(range(len(table.columns)))

        if self.entropy_order:
            assert self.num_orderings == 1
            res = []
            for i, c in enumerate(table.columns):
                bits = Entropy(c.name, table.data.groupby(c.name).size(), [2])
                res.append((bits[0], i))
            s = sorted(res, key=lambda b: b[0], reverse=self.reverse_entropy)
            fixed_ordering = [t[1] for t in s]
            print('Using fixed ordering:', '_'.join(map(str, fixed_ordering)))
            print(s)

        if self.order is not None:
            print('Using passed-in order:', self.order)
            fixed_ordering = self.order

        if self.order_seed is not None and not self.shuffle_at_data_level:
            if self.order_seed == "reverse":
                fixed_ordering = fixed_ordering[::-1]
            else:
                rng = np.random.RandomState(self.order_seed)
                rng.shuffle(fixed_ordering)
            print('Using generated order:', fixed_ordering)

        print(table.data.info())
        self.fixed_ordering = fixed_ordering

        table_train = table

        if self.special_orders > 0:
            special_orders = _SPECIAL_ORDERS[self.dataset][:self.special_orders]
            k = len(special_orders)
            seed = self.special_order_seed * 10000
            for i in range(k, self.special_orders):
                special_orders.append(
                    np.random.RandomState(seed + i - k + 1).permutation(
                        np.arange(len(table.columns))))
            print('Special orders', np.array(special_orders))
        else:
            special_orders = []

        if self.use_transformer:
            args = {
                "num_blocks": 4,
                "d_model": 64,
                "d_ff": 256,
                "num_heads": 4,
                "nin": len(table.columns),
                "input_bins": [c.DistributionSize() for c in table.columns],
                "use_positional_embs": True,
                "activation": "gelu",
                "fixed_ordering": fixed_ordering,
                "dropout": False,
                "seed": self.seed,
                "first_query_shared": False,
                "prefix_dropout": self.prefix_dropout,
                "mask_scheme": 0,  # XXX only works for default order?
            }
            args.update(self.transformer_args)
            model = Transformer(**args).to(get_device())
        else:
            model = MakeMade(
                scale=self.fc_hiddens,
                cols_to_train=table.columns,
                seed=self.seed,
                dataset=self.dataset,
                fixed_ordering=fixed_ordering,
                special_orders=special_orders,
                layers=self.layers,
                residual=self.residual,
                embed_size=self.embed_size,
                dropout=self.dropout,
                per_row_dropout=self.per_row_dropout,
                prefix_dropout=self.prefix_dropout,
                fixed_dropout_ratio=self.fixed_dropout_ratio,
                input_no_emb_if_leq=self.input_no_emb_if_leq,
                disable_learnable_unk=self.disable_learnable_unk,
                embs_tied=self.embs_tied)

        child = None

        print(model.nin, model.nout, model.input_bins)
        blacklist = None
        mb = ReportModel(model, blacklist=blacklist)
        self.mb = mb

        if not isinstance(model, Transformer):
            print('applying weight_init()')
            model.apply(weight_init)

        if isinstance(model, Transformer):
            opt = torch.optim.Adam(
                list(model.parameters()) + (list(child.parameters())
                                            if child else []),
                2e-4,
                betas=(0.9, 0.98),
                eps=1e-9,
            )
        else:
            opt = torch.optim.Adam(
                list(model.parameters()) + (list(child.parameters())
                                            if child else []), 2e-4)

        self.train_data = TableDataset(table_train)

        self.model = model
        self.opt = opt

        if self.checkpoint_to_load:
            self.model.load_state_dict(torch.load(self.checkpoint_to_load))
Exemple #23
0
class Graph:
    transformer = Transformer()

    def __init__(self):
        return
Exemple #24
0
if config.run_tensorboard:
    from input_path import train_summary_writer, valid_summary_writer
else:
    train_summary_writer = None
    valid_summary_writer = None

#tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(file_path.subword_vocab_path)

train_dataset, val_dataset = create_train_data()
train_loss, train_accuracy = get_loss_and_accuracy()
validation_loss, validation_accuracy = get_loss_and_accuracy()

transformer = Transformer(num_layers=config.num_layers,
                          d_model=config.d_model,
                          num_heads=config.num_heads,
                          dff=config.dff,
                          input_vocab_size=config.input_vocab_size,
                          target_vocab_size=config.target_vocab_size,
                          rate=config.dropout_rate)
generator = Generator()

# The @tf.function trace-compiles train_step into a TF graph for faster
# execution. The function specializes to the precise shape of the argument
# tensors. To avoid re-tracing due to the variable sequence lengths or variable
# batch sizes (the last batch is smaller), use input_signature to specify
# more generic shapes.

train_step_signature = [
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
    tf.TensorSpec(shape=(None, None), dtype=tf.int64),
    tf.TensorSpec(shape=(None), dtype=tf.int32),
Exemple #25
0
    def __init__(self, cfg):
        super(DETR, self).__init__()

        self.device = torch.device(cfg.MODEL.DEVICE)

        # Build Backbone
        self.backbone = build_backbone(cfg)
        # Build Transformer
        self.transformer = Transformer(cfg)

        self.aux_loss = not cfg.MODEL.DETR.NO_AUX_LOSS
        self.num_classes = cfg.MODEL.DETR.NUM_CLASSES
        self.num_queries = cfg.MODEL.DETR.NUM_QUERIES
        hidden_dim = self.transformer.d_model

        # Build FFN
        self.class_embed = nn.Linear(hidden_dim, self.num_classes + 1)
        self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3)
        # Build Object Queries
        self.query_embed = nn.Embedding(self.num_queries, hidden_dim)

        backbone_out_shapes = self.backbone.output_shape()["res5"]
        self.input_proj = nn.Conv2d(backbone_out_shapes.channels,
                                    hidden_dim,
                                    kernel_size=1)

        self.position_embedding = position_embedding[
            cfg.MODEL.DETR.POSITION_EMBEDDING](
                num_pos_feats=hidden_dim // 2,
                temperature=cfg.MODEL.DETR.get("TEMPERATURE", 10000),
                normalize=True if cfg.MODEL.DETR.POSITION_EMBEDDING else False,
                scale=None,
            )

        self.weight_dict = {
            "loss_ce": 1.0,
            "loss_bbox": cfg.MODEL.DETR.BBOX_LOSS_COEFF,
            "loss_giou": cfg.MODEL.DETR.GIOU_LOSS_COEFF,
        }

        if self.aux_loss:
            self.aux_weight_dict = {}
            for i in range(cfg.MODEL.DETR.TRANSFORMER.NUM_DEC_LAYERS - 1):
                self.aux_weight_dict.update(
                    {k + f"_{i}": v
                     for k, v in self.weight_dict.items()})
            self.weight_dict.update(self.aux_weight_dict)

        losses = ["labels", "boxes", "cardinality"]

        matcher = HungarianMatcher(
            cost_class=cfg.MODEL.DETR.COST_CLASS,
            cost_bbox=cfg.MODEL.DETR.COST_BBOX,
            cost_giou=cfg.MODEL.DETR.COST_GIOU,
        )

        self.criterion = SetCriterion(self.num_classes,
                                      matcher=matcher,
                                      weight_dict=self.weight_dict,
                                      eos_coef=cfg.MODEL.DETR.EOS_COEFF,
                                      losses=losses)

        self.post_processors = {'bbox': PostProcess()}

        pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
            3, 1, 1)
        pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
            3, 1, 1)
        self.normalizer = lambda x: (x - pixel_mean) / pixel_std

        self.to(self.device)
Exemple #26
0
    params = TransformerParams()

    logger = get_logger('validation', params.experiment_dir)
    logger.info("Logging to {}".format(params.experiment_dir))

    # preprocess data
    dataset = tf.data.Dataset.from_tensor_slices(
        (questions_encoded, answers_encoded))
    input_data = dataset.take(params.num_examples).shuffle(questions_encoded.shape[0]).batch(params.batch_size) \
        .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
    train_data = input_data.take(params.num_training_batches).repeat(
        params.num_epochs)
    valid_data = input_data.skip(params.num_training_batches)

    model = Transformer(params)
    model.train(params, train_data, valid_data, logger)
    # model.inference()
'''
HN NOTE: 
For generalizability of training pipeline, 
Train steps should be methods of the model 
and individual train steps should output masked preds + targets 
But the training loop should be general 
Training loop should be similar to lstm.py current one, contain 
- Tensorboard logging 
- Validation loss + accuracy if i % n 
- Early stopping check 
- Outputting samples 
- Model checkpointing 
'''
                            look_ahead_mask, dec_padding_mask)
        else:
            return self.predict(input_ids,
                               draft_decoder_type=decoder_type,
                               beam_size=beam_size,
                               length_penalty=length_penalty,
                               temperature=temperature, 
                               top_p=top_p,
                               top_k=top_k)

if config.model == 'transformer':
    Model = Transformer(
                       num_layers=config.num_layers, 
                       d_model=config.d_model, 
                       num_heads=config.num_heads, 
                       dff=config.dff, 
                       input_vocab_size=config.input_vocab_size, 
                       target_vocab_size=config.target_vocab_size,
                       add_pointer_generator=config.add_pointer_generator
                       )
        
elif config.model == 'bertified_transformer':
    Model = Bertified_transformer(
                                  num_layers=config.num_layers, 
                                  d_model=config.d_model, 
                                  num_heads=config.num_heads, 
                                  dff=config.dff, 
                                  input_vocab_size=config.input_vocab_size,
                                  target_vocab_size=config.target_vocab_size,
                                  add_pointer_generator=config.add_pointer_generator
                                  )
def evaluate_transformer():
    tokenizer_en = tfds.features.text.SubwordTextEncoder.load_from_file(
        os.path.join(output_path,
                     tag_new_tok + "tokenizer_en_" + str(DICT_SIZE)))
    tokenizer_de = tfds.features.text.SubwordTextEncoder.load_from_file(
        os.path.join(output_path,
                     tag_new_tok + "tokenizer_de_" + str(DICT_SIZE)))
    input_vocab_size = tokenizer_en.vocab_size + 2
    target_vocab_size = tokenizer_de.vocab_size + 2

    # using transformer2 as eng-> de
    transformer2 = Transformer(num_layers,
                               d_model,
                               num_heads,
                               dff,
                               input_vocab_size,
                               target_vocab_size,
                               pe_input=input_vocab_size,
                               pe_target=target_vocab_size,
                               rate=dropout_rate)

    ckpt = tf.train.Checkpoint(transformer2=transformer2)
    ckpt.restore(tf.train.latest_checkpoint(checkpoint_path)).expect_partial()
    print('Latest checkpoint restored!!')
    # loading different part of training set for backtrans (before :TRAIN_ON)
    train_on_end = TRAIN_ON + train_backtrans_on
    split = tfds.Split.TRAIN.subsplit(tfds.percent[TRAIN_ON:train_on_end])
    print('Split is: {}'.format(split))
    examples, metadata = tfds.load('wmt14_translate/de-en',
                                   data_dir=data_path,
                                   with_info=True,
                                   as_supervised=True,
                                   split=split)

    def filter_max_length(x, y, max_length=MAX_LENGTH):
        """Function restricting used sequences x and y to <= max_lenght"""
        return tf.logical_and(
            tf.size(x) <= max_length,
            tf.size(y) <= max_length)

    examples = examples.filter(filter_max_length)
    train_examples4backtrans = examples
    print('type of train_examples4backtrans: {}'.format(
        type(train_examples4backtrans)))
    print('shape of train_examples4backtrans: {}'.format(
        tf.data.experimental.cardinality(train_examples4backtrans)))
    dataset_length = [i
                      for i, _ in enumerate(train_examples4backtrans)][-1] + 1

    def predict(inp_sentence):
        start_token = [tokenizer_en.vocab_size]
        end_token = [tokenizer_en.vocab_size + 1]

        # inp sentence is ENGLISH, hence adding the start and end token
        inp_sentence = start_token + tokenizer_en.encode(
            inp_sentence) + end_token
        encoder_input = tf.expand_dims(inp_sentence, 0)

        # as the target is GERMAN, the first word to the transformer should be the
        # english start token.
        decoder_input = [tokenizer_de.vocab_size]
        output = tf.expand_dims(decoder_input, 0)

        # predictions.shape == (batch_size, seq_len, vocab_size)
        def symbols_to_logits(output):
            batched_input = tf.tile(encoder_input, [beam_width, 1])
            enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
                batched_input, output)
            predictions, attention_weights = transformer2(
                batched_input, output, False, enc_padding_mask, combined_mask,
                dec_padding_mask)
            predictions = predictions[:, -1, :]

            return predictions

        finished_seq, finished_scores, states = beam_search(
            symbols_to_logits,
            output,
            beam_width,
            MAX_LENGTH,
            target_vocab_size,
            alpha,
            states=None,
            eos_id=tokenizer_de.vocab_size + 1,
            stop_early=True,
            use_tpu=False,
            use_top_k_with_unique=True)

        return finished_seq[0, 0, :]

    def translate(sentence):
        result = predict(sentence)
        predicted_sentence = tokenizer_de.decode(
            [i for i in result if i < tokenizer_de.vocab_size])

        print('Input: {}'.format(sentence))
        print('Predicted translation: {}'.format(predicted_sentence))
        return predicted_sentence

    translations = []
    inputs = []
    targets = []
    BLEUs = []
    i = 0
    for sentence in train_examples4backtrans:
        # eng-> deu : hence indexes reversed
        inp = sentence[1].numpy().decode('utf-8')
        target = sentence[0].numpy().decode('utf-8')
        translation = translate(inp)
        BLEU = nltk.translate.bleu_score.sentence_bleu(
            [nltk.word_tokenize(target)], nltk.word_tokenize(translation))
        translations.append(translation)
        inputs.append(inp)
        BLEUs.append(BLEU)
        print('Average BLEU score: ', 100 * np.mean(BLEUs))
        targets.append(target)
        # i+=1
        # store backtrans every 800 sentences
        # if i % 800 == 0:
        #     d = {'input': inputs, 'target': targets, 'translation': translations, 'BLEU': BLEUs}
        #     df = pd.DataFrame.from_dict(d)
        #     df.to_csv(os.path.join(output_path, 'results_backtrans_' + experiment_name + '_interm_'+str(i)+'.csv'))

    d = {
        'input': inputs,
        'target': targets,
        'translation': translations,
        'BLEU': BLEUs
    }
    df = pd.DataFrame.from_dict(d)
    df.to_csv(
        os.path.join(output_path,
                     'results_backtrans_' + experiment_name + '.csv'))

    print('Average BLEU score: ', 100 * np.mean(BLEUs))
lr_scheduler = LRSchedulerPerStep(
    d_model, 4000)  # there is a warning that it is slow, however, it's ok.
# lr_scheduler = LRSchedulerPerEpoch(d_model, 4000, Xtrain.shape[0]/64)  # this scheduler only update lr per epoch
model_saver = ModelCheckpoint(mfile,
                              save_best_only=True,
                              save_weights_only=True)
###########################################

if 'sparse' in sys.argv:

    initParams = initSparseWeights(epsilon,
                                   n_head=n_head,
                                   d_k=d_k,
                                   d_v=d_v,
                                   layers=layers)
    s2s = Transformer(itokens, otokens, len_limit=len_limit, d_model=d_model, d_inner_hid=d_inner_hid, \
       n_head=n_head, d_k=d_k, d_v=d_v, layers=layers, dropout=dropout, weightsForSparsity=initParams)
    s2s.compile(adam)
    s2s.model.summary()

    if 'load_existing_model' in sys.argv:
        s2s.model.summary()
        try:
            s2s.model.load_weights(mfile)
        except:
            print('\n\nnew model')
    else:
        print('*** New model ***')

    for epoch in range(0, maxepoches):
        print('epoch #' + str(epoch))
Exemple #30
0
descriptions = df['description'].tolist()

FT = FilteredTokenizer()
Tokens = FT.filter_and_tokenize(descriptions, mode=TOKEN_FILTERS, tokenizer=TOKENIZER, filter_fpath=CUSTOM_FILTER_PATH)

WordEmbedding_ = WordEmbedding()
WordEmbedding_.load()

print("====== Examples of things you can do with the embeddings =======")
print(WordEmbedding_.word_vectors.most_similar(positive=['woman', 'king'], negative=['man']))
print(WordEmbedding_.word_vectors.most_similar("dont"))
print(WordEmbedding_.word_vectors.most_similar("a"))

matched_tokens, unmatched_tokens = WordEmbedding_.check_embedding_coverage(list_tokens=Tokens, verbose=True)
# Then you will get a file named <embedding file name> + <date time> + unmatched tokens
# this is a file with count distinct unmatched tokens, sorted in descending order

# Then you are able to see these attributes:
print("WordEmbedding_.coverage", WordEmbedding_.coverage)
# print("WordEmbedding_.wordvec_map", WordEmbedding_.wordvec_map)
print("You can get a word vector of the word 'hello' by calling: WordEmbedding_.word_vectors.get_vector('hello')", 
	  WordEmbedding_.word_vectors.get_vector('hello'))

T = Transformer(WordEmbedding_.wordvec_map)

# will convert the points numbers (score values) into one-hot vectors of categories defined by us (interval)
# You can change the setting in config
y = df['points'].tolist()
X, y = T.fit_transform(Tokens, y, drop_long_sentences=DROP_LONG_SENTENCES,
                    drop_short_sentences=DROP_SHORT_SENTENCES, num2cat_=CONVERT_Y, intervals=Y_CAT_INTERVALS)
print("X.shape, y.shape ", X.shape, y.shape)