コード例 #1
0
    def __init__(self, args):
        # get the dir with pre-trained model

        load_dir = os.path.join(args.experiment_dir, args.old_model_dir)

        # initialize, and load vocab
        self.vocab = Vocab()
        vocab_filename = os.path.join(load_dir, "vocab.json")
        self.vocab.load_from_dict(vocab_filename)

        # load configuration
        with open(os.path.join(load_dir, "config.json"), "r") as f:
            config = json.load(f)

        args.response_len = config["response_len"]
        args.history_len = config["history_len"]

        # initialize an empty dataset. used to get input features
        self.dataset = DialogueDataset(None,
                                       history_len=config["history_len"],
                                       response_len=config["response_len"],
                                       vocab=self.vocab,
                                       update_vocab=False)

        # set device
        self.device = torch.device(args.device)

        # initialize model
        model = Transformer(config["vocab_size"],
                            config["vocab_size"],
                            config["history_len"],
                            config["response_len"],
                            d_word_vec=config["embedding_dim"],
                            d_model=config["model_dim"],
                            d_inner=config["inner_dim"],
                            n_layers=config["num_layers"],
                            n_head=config["num_heads"],
                            d_k=config["dim_k"],
                            d_v=config["dim_v"],
                            dropout=config["dropout"],
                            pretrained_embeddings=None).to(self.device)

        # load checkpoint
        checkpoint = torch.load(os.path.join(load_dir, args.old_model_name),
                                map_location=self.device)
        model.load_state_dict(checkpoint['model'])

        # create chatbot
        self.chatbot = Chatbot(args, model)

        self.args = args
コード例 #2
0
class TestTransformer(unittest.TestCase):
    def setUp(self):
        self.transformer = Transformer()
        self.src_json = "/opt/repos/plan-test/test/in_files/fever_test_events.json"
        self.src_csv = "/opt/repos/plan-test/test/in_files/fever_plans.csv"

    def test_create_events_info_df(self):
        res = self.transformer.create_events_info_df_from_file(self.src_json)
        self.assertEqual(42418, len(res))
コード例 #3
0
 def __init__(self, app_name='Base Widget'):
     super(BaseWidget, self).__init__()
     self.app_name = app_name
     # main window
     self.setGeometry(50, 50, 200, 200)
     self.setWindowTitle(app_name)
     # enable dragging and dropping onto the GUI
     self.setAcceptDrops(True)
     self.transformer = Transformer()
コード例 #4
0
ファイル: controller.py プロジェクト: nordev/plan-test
    def run(self):
        # TODO: Unncomment for uncompress and move events json file compressed
        # self._uncompress_events()

        # TODO: Unncomment for deleting events json file compressed
        # Delete events json file
        # os.remove(src_gz_file)

        extractor = Extractor(self.src_db_config)
        df_plans = extractor.export_table_to_df(self.table_plans)

        transformer = Transformer()
        df_events = transformer.create_events_info_df_from_file(
            self.src_events)
        df_purchase = transformer.create_purchase_detail_df_from_df(
            df_events, df_plans)

        loader = Loader(db_name='test_fever')

        events_json = df_events.to_dict(orient="records")
        # loader.delete_many(collection_name=self.mongo_db_events, json_query={}) # Test

        try:
            loader.insert_many(collection_name=self.mongo_db_events,
                               json_list=events_json)
        except pymongo.errors.BulkWriteError:
            loader.upsert_many_one_by_one(collection_name=self.mongo_db_events,
                                          json_list=events_json)

        purchase_json = df_purchase.to_dict(orient="records")

        # loader.delete_many(collection_name=self.mongo_db_purchase, json_query={}) # Test
        try:
            loader.insert_many(collection_name=self.mongo_db_purchase,
                               json_list=purchase_json)
        except pymongo.errors.BulkWriteError:
            loader.upsert_many_one_by_one(
                collection_name=self.mongo_db_purchase,
                json_list=purchase_json)
コード例 #5
0
def main():
    flags = TransformerConfig(
        nheads=8,
        model_dim=512,
        hidden_dim=2048,
        depth=6,
        epochs=10,
        train_batch_size=32,
        eval_batch_size=32,
    )

    torch.manual_seed(flags.random_seed)
    torch.cuda.manual_seed(flags.random_seed)

    train_dataset = TextDataset(
        path_root='../../ml-datasets/wmt14/',
        path_src="train.en",
        path_tgt="train.de",
        path_tokenizer='tokenizer/',
    )

    eval_dataset = TextDataset(
        path_root='../../ml-datasets/wmt14/',
        path_src="newstest2014.en",
        path_tgt="newstest2014.de",
        path_tokenizer='tokenizer/',
    )

    vocab_size = train_dataset.tokenizer.get_vocab_size()
    max_len = max(train_dataset.max_len, eval_dataset.max_len)
    model = Transformer(
        vocab_size=vocab_size,
        model_dim=flags.model_dim,
        hidden_dim=flags.hidden_dim,
        nheads=flags.nheads,
        max_len=max_len,
        depth=flags.depth,
    )

    train_op = Trainer(
        flags=flags,
        model=model,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        tb_writer=SummaryWriter(),
        vocab_size=vocab_size,
    )
    train_op.fit()
コード例 #6
0
    def setUp(self):
        flags = TransformerConfig(
            nheads=2,
            model_dim=10,
            hidden_dim=10,
            depth=2,
            epochs=1,
            train_batch_size=64,
        )

        train_dataset = TextDataset(
            path_root='../../ml-datasets/wmt14/',
            path_src="newstest2014.en",
            path_tgt="newstest2014.de",
            path_tokenizer='tokenizer/',
        )

        eval_dataset = TextDataset(
            path_root='../../ml-datasets/wmt14/',
            path_src="newstest2014.en",
            path_tgt="newstest2014.de",
            path_tokenizer='tokenizer/',
        )

        self.vocab_size = train_dataset.tokenizer.get_vocab_size()
        max_len = max(train_dataset.max_len, eval_dataset.max_len)
        model = Transformer(
            vocab_size=self.vocab_size,
            model_dim=flags.model_dim,
            hidden_dim=flags.hidden_dim,
            nheads=flags.nheads,
            max_len=max_len,
            depth=flags.depth,
        )

        self.train_op = Trainer(
            flags=flags,
            model=model,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            tb_writer=None,
            vocab_size=self.vocab_size,
        )
        self.tokenizer = self.train_op.train_dataset.tokenizer
コード例 #7
0
ファイル: train.py プロジェクト: SaiSakethAluru/SeqGen
def train(args):
    if args.device == 'cuda' and torch.cuda.is_available():
        device = torch.device('cuda')
        print("using gpu: ", torch.cuda.get_device_name(torch.cuda.current_device()))
        
    else:
        device = torch.device('cpu')
        print('using cpu')
    
    if args.dataset_name == 'pubmed':
        LABEL_LIST = PUBMED_LABEL_LIST
    elif args.dataset_name == 'nicta':
        LABEL_LIST = NICTA_LABEL_LIST
    elif args.dataset_name == 'csabstract':
        LABEL_LIST = CSABSTRACT_LABEL_LIST

    train_x,train_labels = load_data(args.train_data, args.max_par_len,LABEL_LIST)
    dev_x,dev_labels = load_data(args.dev_data, args.max_par_len,LABEL_LIST)
    test_x,test_labels = load_data(args.test_data, args.max_par_len,LABEL_LIST)

    tokenizer = AutoTokenizer.from_pretrained(args.bert_model)
    train_x = tokenize_and_pad(train_x,tokenizer,args.max_par_len,args.max_seq_len, LABEL_LIST)  ## N, par_len, seq_len
    dev_x = tokenize_and_pad(dev_x,tokenizer,args.max_par_len, args.max_seq_len, LABEL_LIST)
    test_x = tokenize_and_pad(test_x,tokenizer, args.max_par_len, args.max_seq_len, LABEL_LIST)

    training_params = {
        "batch_size": args.batch_size,
        "shuffle": True,
        "drop_last": False
        }
    dev_params = {
        "batch_size": args.batch_size,
        "shuffle": False,
        "drop_last": False
        }
    test_params = {
        "batch_size": args.batch_size,
        "shuffle": False,
        "drop_last": False
        }

    print('train.py train_x.shape:',train_x.shape,'train_labels.shape',train_labels.shape)
    training_generator = return_dataloader(inputs=train_x, labels=train_labels, params=training_params)
    dev_generator = return_dataloader(inputs=dev_x, labels=dev_labels, params=dev_params)
    test_generator = return_dataloader(inputs=test_x, labels=test_labels, params=test_params)   

    src_pad_idx = 0
    trg_pad_idx = 0
    model = Transformer(
        label_list=LABEL_LIST,
        src_pad_idx=src_pad_idx,
        trg_pad_idx=trg_pad_idx,
        embed_size=args.embed_size,
        num_layers=args.num_layers,   ## debug
        forward_expansion=args.forward_expansion,
        heads=len(LABEL_LIST),
        dropout=0.1,
        device=device,
        max_par_len=args.max_par_len,
        max_seq_len=args.max_seq_len,
        bert_model=args.bert_model
    )
    model = model.to(device).float()
    
    criterion = nn.CrossEntropyLoss(ignore_index=trg_pad_idx)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
    
    epoch_losses = []
    best_val_loss = float('inf')
    for epoch in range(args.num_epochs):
        model.train()
        print(f"----------------[Epoch {epoch} / {args.num_epochs}]-----------------------")

        losses = []
        for batch_idx,batch in tqdm(enumerate(training_generator)):
            inp_data,target = batch
            inp_data = inp_data.to(device)
            target = target.to(device)

            ## For CRF
            optimizer.zero_grad()

            loss = -model(inp_data.long(),target[:,1:], training=True)       ## directly gives loss when training = True


            losses.append(loss.item())

            loss.backward()

            optimizer.step()
            
        mean_loss = sum(losses)/len(losses)

        print(f"Mean loss for epoch {epoch} is {mean_loss}")
        # Validation
        model.eval()
        val_targets = []
        val_preds = []
        for batch_idx,batch in tqdm(enumerate(dev_generator)):
            inp_data,target = batch
            inp_data = inp_data.to(device)
            target = target.to(device)
            with torch.no_grad():
                output = model(inp_data,target[:,:-1], training=False)      ## directly we get the labels here, instead of logits

            flattened_target = target[:,1:].to('cpu').flatten()
            output = convert_crf_output_to_tensor(output,args.max_par_len)
            flattened_preds = output.to('cpu').flatten()
            for target_i,pred_i in zip(flattened_target,flattened_preds):
                if target_i != 0:
                    val_targets.append(target_i)
                    val_preds.append(pred_i)

        f1 = f1_score(val_targets,val_preds,average='micro')
        
        print(f'------Micro F1 score on dev set: {f1}------')

        if loss < best_val_loss:
            print(f"val loss less than previous best val loss of {best_val_loss}")
            best_val_loss = loss
            if args.save_model:
                dir_name = f"seed_{args.seed}_parlen_{args.max_par_len}_seqlen_{args.max_seq_len}_lr_{args.lr}.pt"
                output_path = os.path.join(args.save_path,dir_name)
                if not os.path.exists(args.save_path):
                    os.makedirs(args.save_path)
                print(f"Saving model to path {output_path}")
                torch.save(model,output_path)

        # Testing
        if epoch % args.test_interval == 0:
            model.eval()
            test_targets = []
            test_preds = []
            for batch_idx, batch in tqdm(enumerate(test_generator)):
                inp_data,target = batch
                inp_data = inp_data.to(device)
                target = target.to(device)
                with torch.no_grad():
                    output = model(inp_data,target[:,:-1],training=False)
                    
                flattened_target = target[:,1:].to('cpu').flatten()
                output = convert_crf_output_to_tensor(output,args.max_par_len)
                flattened_preds = output.to('cpu').flatten()
                for target_i,pred_i in zip(flattened_target,flattened_preds):
                    if target_i!=0:
                        test_targets.append(target_i)
                        test_preds.append(pred_i)
            
            f1 = f1_score(test_targets,test_preds,average='micro')
            print(f"------Micro F1 score on test set: {f1}------")
コード例 #8
0
ファイル: test.py プロジェクト: SaiSakethAluru/BERT-SeqGen
embed_size = 8
num_layers = 5
heads = 4
device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')
forward_expansion = 4
dropout = 0.8
max_length = 10
labels = torch.tensor([1, 2, 3, 4]).to(device)
batch_size = 2
par_len = 3
# model = SentenceEncoder(src_vocab_size,embed_size,num_layers,heads,device,forward_expansion,dropout,max_length,labels)
src_pad_idx = 0
trg_pad_idx = 0
model = Transformer(src_vocab_size, len(labels), src_pad_idx, trg_pad_idx,
                    labels, embed_size, num_layers, forward_expansion, heads,
                    dropout, device, max_length)

# x = torch.tensor([[[1,5,6,3,2,4,2,4,1,0],[1,3,2,4,3,2,5,0,0,0],]]).to(device)
x = torch.randint(0, 10, size=(batch_size, par_len, max_length)).to(device)
# trg = torch.tensor([1]).expand(batch_size,-1).to(device)
trg = torch.randint(0, len(labels), size=(batch_size, par_len)).to(device)
# pad_idx = 0
# mask = (x != pad_idx).to(device)
print(x.shape)
output = model(x, trg)

print(output)
# print(word_lev_outputs)
print("output.shape", output.shape)
# print("word_lev_outputs.shape", word_lev_outputs.shape)
コード例 #9
0
            padding_values=train_dataset.padding_values),
    )

    test_dataset = SMILESDataset(test, vocab, mean_log_p, std_log_p)
    test_loader = DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        collate_fn=make_collate_fn(
            padding_values=train_dataset.padding_values),
    )

    model = Transformer(
        vocab_size=train_dataset.vocab_size,
        dmodel=512,  # 512
        nhead=8,
        decoder_layers=6,  # 6
        dim_feedforward=1024,  # 1024
        dropout=0.1,
        num_positions=1024,
        n_conditional_channels=1,
    )
    model.to(device)

    optimizer = configure_optimizer(model.named_parameters(), args.lr)
    steps_per_epoch = len(train_dataset) / (args.batch_size *
                                            args.accumulation)
    scheduler = configure_scheduler(
        optimizer,
        training_steps=(args.epochs * steps_per_epoch),
        warmup=args.warmup * steps_per_epoch,
    )
    criterion = torch.nn.CrossEntropyLoss(reduction="none")
コード例 #10
0
 def setUp(self):
     self.transformer = Transformer()
     self.src_json = "/opt/repos/plan-test/test/in_files/fever_test_events.json"
     self.src_csv = "/opt/repos/plan-test/test/in_files/fever_plans.csv"
コード例 #11
0
ファイル: answer_pct_user.py プロジェクト: nordev/plan-test
# This script want to answer the question:
# What percentage of users that purchase in the tasting category have
# performed a search in the app prior to the purchase event?

import numpy as np
import pandas as pd
from src.transformer import Transformer

src_csv = "/opt/repos/plan-test/src/in/fever_plans.csv"
src_json = "/opt/repos/plan-test/src/in/fever_test_events.json"

df_plans = pd.read_csv(src_csv, dtype={'id': np.int64}, encoding='utf-8')
df_events = Transformer().create_events_info_df_from_file(src_json)

# Select only events with plan_id
df_events_fk = df_events.loc[df_events['plan_id'].notna()].copy()

# Convert again plan_id to int64. Before was an object because there were NaN values
df_events_fk['plan_id'] = df_events_fk['plan_id'].astype(np.int64)

# Left join of dfs on primary key plan id.
df_merged = df_plans.merge(df_events_fk, left_on='id', right_on='plan_id', how='left')

user_purchase = df_merged.loc[
    (df_merged['category'] == 'tasting') & (df_merged['event'] == 'purchase')].reset_index()

# Get max date of every purchase by user
user_purchase_max = user_purchase[
    user_purchase.groupby('user_id').time.transform('max') == user_purchase['time']]

# Which of user_purchase_max had done a search before that date
コード例 #12
0
ファイル: train.py プロジェクト: 786440445/transformer_asr
def main(args):
    # load dictionary and generate char_list, sos_id, eos_id
    char_list, sos_id, eos_id = process_dict(args.dict)
    vocab_size = len(char_list)
    tr_dataset = AudioDataset('train', args.batch_size)
    cv_dataset = AudioDataset('dev', args.batch_size)

    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                shuffle=args.shuffle,
                                feature_dim=args.feature_dim,
                                char_list=char_list,
                                path_list=tr_dataset.path_lst,
                                label_list=tr_dataset.han_lst,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                num_workers=args.num_workers,
                                feature_dim=args.feature_dim,
                                char_list=char_list,
                                path_list=cv_dataset.path_lst,
                                label_list=cv_dataset.han_lst,
                                LFR_m=args.LFR_m,
                                LFR_n=args.LFR_n)

    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}

    encoder = Encoder(args.d_input * args.LFR_m,
                      args.d_low_dim,
                      args.n_layers_enc,
                      args.n_head,
                      args.d_k,
                      args.d_v,
                      args.d_model,
                      args.d_inner,
                      dropout=args.dropout,
                      pe_maxlen=args.pe_maxlen)
    decoder = Decoder(
        sos_id,
        eos_id,
        vocab_size,
        args.d_word_vec,
        args.n_layers_dec,
        args.n_head,
        args.d_k,
        args.d_v,
        args.d_model,
        args.d_inner,
        dropout=args.dropout,
        tgt_emb_prj_weight_sharing=args.tgt_emb_prj_weight_sharing,
        pe_maxlen=args.pe_maxlen)
    model = Transformer(encoder, decoder)
    print(model)
    model.cuda()
    # optimizer
    optimizier = TransformerOptimizer(
        torch.optim.Adam(model.parameters(), betas=(0.9, 0.98), eps=1e-09),
        args.init_lr, args.d_model, args.warmup_steps)

    # solver
    solver = Solver(data, model, optimizier, args)
    solver.train()
コード例 #13
0
    train, test_val = train_test_split(df, test_size=0.1)
    # test, val = train_test_split(test_val, test_size=0.5)

    vocab = create_vocab(df)
    print("Vocab size: ", len(vocab))

    train_dataset = SMILESDataset(train, vocab, mean_log_p, std_log_p)
    print("With special tokens", train_dataset.vocab_size)
    print(train_dataset.char_to_idx)

    state_dict = torch.load(args.model)
    model = Transformer(
        vocab_size=36,
        dmodel=512,  # 512
        nhead=8,
        decoder_layers=6,  # 6
        dim_feedforward=1024,  # 1024
        dropout=0.1,
        num_positions=1024,
        n_conditional_channels=1,
    )

    n_iters = args.n_molecules // args.batch_size

    model.load_state_dict(state_dict)
    model.eval()
    model.to(device)
    generated_molecules = []
    corresponding_log_p = []
    start = time.time()
    for it in tqdm(range(n_iters), total=n_iters, ncols=80):
        sample_log_p, _ = gmm.sample(args.batch_size)
コード例 #14
0
ファイル: train.py プロジェクト: SaiSakethAluru/BERT-SeqGen
def train(args):
    if args.device == 'cuda' and torch.cuda.is_available():
        device = torch.device('cuda')
        print("using gpu: ", torch.cuda.get_device_name(torch.cuda.current_device()))
        
    else:
        device = torch.device('cpu')
        print('using cpu')
    
    if args.dataset_name == 'pubmed':
        LABEL_LIST = PUBMED_LABEL_LIST
    elif args.dataset_name == 'nicta':
        LABEL_LIST = NICTA_LABEL_LIST
    elif args.dataset_name == 'csabstract':
        LABEL_LIST = CSABSTRACT_LABEL_LIST

    train_x,train_labels = load_data(args.train_data, args.max_par_len,LABEL_LIST)
    dev_x,dev_labels = load_data(args.dev_data, args.max_par_len,LABEL_LIST)
    test_x,test_labels = load_data(args.test_data, args.max_par_len,LABEL_LIST)

    tokenizer = AutoTokenizer.from_pretrained(args.bert_model)
    train_x = tokenize_and_pad(train_x,tokenizer,args.max_par_len,args.max_seq_len, LABEL_LIST)  ## N, par_len, seq_len
    dev_x = tokenize_and_pad(dev_x,tokenizer,args.max_par_len, args.max_seq_len, LABEL_LIST)
    test_x = tokenize_and_pad(test_x,tokenizer, args.max_par_len, args.max_seq_len, LABEL_LIST)

    # print('train_x[0]',train_x[0])
    # print('train_x[0].shape',train_x[0].shape)
    # quit()
    training_params = {
        "batch_size": args.batch_size,
        "shuffle": True,
        "drop_last": False
        }
    dev_params = {
        "batch_size": args.batch_size,
        "shuffle": False,
        "drop_last": False
        }
    test_params = {
        "batch_size": args.batch_size,
        "shuffle": False,
        "drop_last": False
        }

    print('train.py train_x.shape:',train_x.shape,'train_labels.shape',train_labels.shape)
    training_generator = return_dataloader(inputs=train_x, labels=train_labels, params=training_params)
    dev_generator = return_dataloader(inputs=dev_x, labels=dev_labels, params=dev_params)
    test_generator = return_dataloader(inputs=test_x, labels=test_labels, params=test_params)   

    src_pad_idx = 0
    trg_pad_idx = 0
    model = Transformer(
        label_list=LABEL_LIST,
        src_pad_idx=src_pad_idx,
        trg_pad_idx=trg_pad_idx,
        embed_size=args.embed_size,
        num_layers=args.num_layers,   ## debug
        forward_expansion=args.forward_expansion,
        heads=len(LABEL_LIST),
        dropout=0.1,
        device=device,
        max_par_len=args.max_par_len,
        max_seq_len=args.max_seq_len,
        bert_model=args.bert_model
    )
    model = model.to(device).float()
    # for param in model.parameters():
    #     try:
    #         torch.nn.init.xavier_uniform_(param)
    #     except:
    #         continue
    
    criterion = nn.CrossEntropyLoss(ignore_index=trg_pad_idx)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #     optimizer, factor=0.1, patience=10, verbose=True
    # )
    
    epoch_losses = []
    best_val_loss = float('inf')
    for epoch in range(args.num_epochs):
        model.train()
        print(f"----------------[Epoch {epoch} / {args.num_epochs}]-----------------------")

        losses = []
        for batch_idx,batch in tqdm(enumerate(training_generator)):
            # print('batch',batch)
            # print('type of batch',type(batch))
            inp_data,target = batch
            # print('inp_data',inp_data)
            # print('type(inp_data)',type(inp_data))
            # print('target',target)
            # print('type(target)',type(target))
            # print('target.shape',target.shape)
            inp_data = inp_data.to(device)
            # print('inp_data.shape',inp_data.shape)
            target = target.to(device)
            # assert False

            ## For generation
            # output = model(inp_data.long(),target[:,:-1], training=True)       ## N,par_len, label_size
            
            ## For CRF
            optimizer.zero_grad()

            # output = model(inp_data.long(),target[:,1:], training=True)       ## N,par_len, label_size
            loss = -model(inp_data.long(),target[:,1:], training=True)       ## directly gives loss when training = True


            # output = model(inp_data,target[:,:-1])

            # print('model net',make_dot(output))
            # print(make_dot(output))
            # make_arch = make_dot(output)
            # Source(make_arch).render('graph.png')
            # assert False
            ## output - N,par_len, num_labels --> N*par_len, num_labels
            # output = output.reshape(-1,output.shape[2])
            ## target -
            # target = target[:,1:].reshape(-1)

            # print('output.shape',output.shape)
            # print('target.shape',target.shape)
            # print(f'{epoch} model params', list(model.parameters())[-1])
            # print('len params',len(list(model.parameters())))
            # print('trainable params: ',len(list(filter(lambda p: p.requires_grad, model.parameters()))))

            # loss = criterion(output,target)
            # loss.retain_grad()
            losses.append(loss.item())

            # print(f'{epoch} loss grads before', list(loss.grad)[-1])
            loss.backward()
            # print(f'{epoch} loss grads after', loss.grad)
            # print('model params')
            # count = 0
            # for p in model.parameters():
            #     if p.grad is not None:
            #         print(p.grad,p.grad.norm())
            #         count +=1 
            # print(f'non none grads are {count}')
            # torch.nn.utils.clip_grad_norm_(model.parameters(),max_norm=1)

            optimizer.step()
            # break #NOTE: break is there only for quick checking. Remove this for actual training.
            
        mean_loss = sum(losses)/len(losses)
        # scheduler.step(mean_loss)

        print(f"Mean loss for epoch {epoch} is {mean_loss}")
        # Validation
        model.eval()
        # val_losses = []
        val_targets = []
        val_preds = []
        for batch_idx,batch in tqdm(enumerate(dev_generator)):
            inp_data,target = batch
            inp_data = inp_data.to(device)
            target = target.to(device)
            with torch.no_grad():
                output = model(inp_data,target[:,:-1], training=False)      ## directly we get the labels here, instead of logits
                # reshaped_output = output.reshape(-1,output.shape[2])
                # reshaped_target = target[:,1:].reshape(-1)
                # loss = criterion(reshaped_output,reshaped_target).item()

            # val_losses.append(loss)
            flattened_target = target[:,1:].to('cpu').flatten()
            # print(output)
            output = convert_crf_output_to_tensor(output,args.max_par_len)
            # flattened_preds = torch.softmax(output,dim=-1).argmax(dim=-1).to('cpu').flatten()
            flattened_preds = output.to('cpu').flatten()
            for target_i,pred_i in zip(flattened_target,flattened_preds):
                if target_i != 0:
                    val_targets.append(target_i)
                    val_preds.append(pred_i)
            # val_targets.append(target[:,1:].to('cpu').flatten())
            # output = torch.softmax(output,dim=-1).argmax(dim=-1)
            # val_preds.append(output.to('cpu').flatten())
            # break #NOTE: break is there only for quick checking. Remove this for actual training.

        # loss = sum(val_losses) / len(val_losses)
        # print(f"Validation loss at epoch {epoch} is {loss}")
        # val_targets = torch.cat(val_targets,dim=0)
        # val_preds = torch.cat(val_preds,dim=0)
        f1 = f1_score(val_targets,val_preds,average='micro')
        
        print(f'------Micro F1 score on dev set: {f1}------')

        # if loss < best_val_loss:
        #     print(f"val loss less than previous best val loss of {best_val_loss}")
        #     best_val_loss = loss
        #     if args.save_model:
        #         dir_name = f"seed_{args.seed}_parlen_{args.max_par_len}_seqlen_{args.max_seq_len}_lr_{args.lr}.pt"
        #         output_path = os.path.join(args.save_path,dir_name)
        #         if not os.path.exists(args.save_path):
        #             os.makedirs(args.save_path)
        #         print(f"Saving model to path {output_path}")
        #         torch.save(model,output_path)

        # Testing
        if epoch % args.test_interval == 0:
            model.eval()
            test_targets = []
            test_preds = []
            for batch_idx, batch in tqdm(enumerate(test_generator)):
                inp_data,target = batch
                inp_data = inp_data.to(device)
                target = target.to(device)
                with torch.no_grad():
                    output = model(inp_data,target[:,:-1],training=False)
                    
                # output = torch.softmax(output,dim=-1).argmax(dim=-1)
                flattened_target = target[:,1:].to('cpu').flatten()
                output = convert_crf_output_to_tensor(output,args.max_par_len)
                flattened_preds = output.to('cpu').flatten()
                for target_i,pred_i in zip(flattened_target,flattened_preds):
                    if target_i!=0:
                        test_targets.append(target_i)
                        test_preds.append(pred_i)
                # test_targets.append(target[:,1:].to('cpu').flatten())
                # test_preds.append(output.to('cpu').flatten())
                # break  #NOTE: break is there only for quick checking. Remove this for actual training. 
            
            # test_targets = torch.cat(test_targets,dim=0)
            # test_preds = torch.cat(test_preds,dim=0)
            # f1 = f1_score(target[:,1:].to('cpu').flatten(),output.to('cpu').flatten(),average='macro')
            f1 = f1_score(test_targets,test_preds,average='micro')
            print(f"------Micro F1 score on test set: {f1}------")

    ## Uncomment for generating attention vectors. 
    # Look into src/word_level_labelatt.py for details of computing and storing these attention scores
    # Look into src/selfatt.py for sentence level attention scores
    att_x = train_x[:10,:,:].to(device)
    att_y = train_labels[:10,:].to(device)[:,:-1] 
    model(att_x,att_y,training=False,att_heat_map=True)    
コード例 #15
0
    def __init__(self, args):

        # set up output directory
        self.output_dir = os.path.join(args.experiment_dir, args.run_name)
        if not os.path.exists(args.experiment_dir):
            os.mkdir(args.experiment_dir)
        if not os.path.exists(self.output_dir):
            os.mkdir(self.output_dir)
        if not os.path.exists(os.path.join(args.experiment_dir,"runs/")):
            os.mkdir(os.path.join(args.experiment_dir,"runs/"))

        # initialize tensorboard writer
        self.runs_dir = os.path.join(args.experiment_dir,"runs/",args.run_name)
        self.writer = SummaryWriter(self.runs_dir)

        # initialize global steps
        self.train_gs = 0
        self.val_gs = 0

        # initialize model config
        self.config = ModelConfig(args)

        # check if there is a model to load
        if args.old_model_dir is not None:
            self.use_old_model = True
            self.load_dir = args.old_model_dir
            self.config.load_from_file(
                os.path.join(self.load_dir, "config.json"))

            # create vocab
            self.vocab = Vocab()
            self.vocab.load_from_dict(os.path.join(self.load_dir, "vocab.json"))
            self.update_vocab = False
            self.config.min_count=1
        else:
            self.use_old_model = False

            self.vocab = None
            self.update_vocab = True

        # create data sets
        self.dataset_filename = args.dataset_filename

        # train
        self.train_dataset = DialogueDataset(
            os.path.join(self.dataset_filename, "train.csv"),
            self.config.history_len,
            self.config.response_len,
            self.vocab,
            self.update_vocab)
        self.data_loader_train = torch.utils.data.DataLoader(
            self.train_dataset, self.config.train_batch_size, shuffle=True)
        self.config.train_len = len(self.train_dataset)

        self.vocab = self.train_dataset.vocab

        # eval
        self.val_dataset = DialogueDataset(
            os.path.join(self.dataset_filename, "val.csv"),
            self.config.history_len,
            self.config.response_len,
            self.vocab,
            self.update_vocab)
        self.data_loader_val = torch.utils.data.DataLoader(
            self.val_dataset, self.config.val_batch_size, shuffle=True)
        self.config.val_len = len(self.val_dataset)

        # update, and save vocab
        self.vocab = self.val_dataset.vocab
        self.train_dataset.vocab = self.vocab
        if (self.config.min_count > 1):
            self.config.old_vocab_size = len(self.vocab)
            self.vocab.prune_vocab(self.config.min_count)
        self.vocab.save_to_dict(os.path.join(self.output_dir, "vocab.json"))
        self.vocab_size = len(self.vocab)
        self.config.vocab_size = self.vocab_size

        # print and save the config file
        self.config.print_config(self.writer)
        self.config.save_config(os.path.join(self.output_dir, "config.json"))

        # set device
        self.device = torch.device('cuda')

        # create model
        self.model = Transformer(
            self.config.vocab_size,
            self.config.vocab_size,
            self.config.history_len,
            self.config.response_len,
            d_word_vec=self.config.embedding_dim,
            d_model=self.config.model_dim,
            d_inner=self.config.inner_dim,
            n_layers=self.config.num_layers,
            n_head=self.config.num_heads,
            d_k=self.config.dim_k,
            d_v=self.config.dim_v,
            dropout=self.config.dropout
        ).to(self.device)

        # create optimizer
        self.optimizer = torch.optim.Adam(
            filter(lambda x: x.requires_grad, self.model.parameters()),
            betas=(0.9, 0.98), eps=1e-09)

        # load old model, optimizer if there is one
        if self.use_old_model:
            self.model, self.optimizer = load_checkpoint(
                os.path.join(self.load_dir, "model.bin"),
                self.model, self.optimizer, self.device)


        # create a sceduled optimizer object
        self.optimizer = ScheduledOptim(
            self.optimizer, self.config.model_dim, self.config.warmup_steps)
コード例 #16
0
class ModelOperator:
    def __init__(self, args):

        # set up output directory
        self.output_dir = os.path.join(args.experiment_dir, args.run_name)
        if not os.path.exists(args.experiment_dir):
            os.mkdir(args.experiment_dir)
        if not os.path.exists(self.output_dir):
            os.mkdir(self.output_dir)
        if not os.path.exists(os.path.join(args.experiment_dir,"runs/")):
            os.mkdir(os.path.join(args.experiment_dir,"runs/"))

        # initialize tensorboard writer
        self.runs_dir = os.path.join(args.experiment_dir,"runs/",args.run_name)
        self.writer = SummaryWriter(self.runs_dir)

        # initialize global steps
        self.train_gs = 0
        self.val_gs = 0

        # initialize model config
        self.config = ModelConfig(args)

        # check if there is a model to load
        if args.old_model_dir is not None:
            self.use_old_model = True
            self.load_dir = args.old_model_dir
            self.config.load_from_file(
                os.path.join(self.load_dir, "config.json"))

            # create vocab
            self.vocab = Vocab()
            self.vocab.load_from_dict(os.path.join(self.load_dir, "vocab.json"))
            self.update_vocab = False
            self.config.min_count=1
        else:
            self.use_old_model = False

            self.vocab = None
            self.update_vocab = True

        # create data sets
        self.dataset_filename = args.dataset_filename

        # train
        self.train_dataset = DialogueDataset(
            os.path.join(self.dataset_filename, "train.csv"),
            self.config.history_len,
            self.config.response_len,
            self.vocab,
            self.update_vocab)
        self.data_loader_train = torch.utils.data.DataLoader(
            self.train_dataset, self.config.train_batch_size, shuffle=True)
        self.config.train_len = len(self.train_dataset)

        self.vocab = self.train_dataset.vocab

        # eval
        self.val_dataset = DialogueDataset(
            os.path.join(self.dataset_filename, "val.csv"),
            self.config.history_len,
            self.config.response_len,
            self.vocab,
            self.update_vocab)
        self.data_loader_val = torch.utils.data.DataLoader(
            self.val_dataset, self.config.val_batch_size, shuffle=True)
        self.config.val_len = len(self.val_dataset)

        # update, and save vocab
        self.vocab = self.val_dataset.vocab
        self.train_dataset.vocab = self.vocab
        if (self.config.min_count > 1):
            self.config.old_vocab_size = len(self.vocab)
            self.vocab.prune_vocab(self.config.min_count)
        self.vocab.save_to_dict(os.path.join(self.output_dir, "vocab.json"))
        self.vocab_size = len(self.vocab)
        self.config.vocab_size = self.vocab_size

        # print and save the config file
        self.config.print_config(self.writer)
        self.config.save_config(os.path.join(self.output_dir, "config.json"))

        # set device
        self.device = torch.device('cuda')

        # create model
        self.model = Transformer(
            self.config.vocab_size,
            self.config.vocab_size,
            self.config.history_len,
            self.config.response_len,
            d_word_vec=self.config.embedding_dim,
            d_model=self.config.model_dim,
            d_inner=self.config.inner_dim,
            n_layers=self.config.num_layers,
            n_head=self.config.num_heads,
            d_k=self.config.dim_k,
            d_v=self.config.dim_v,
            dropout=self.config.dropout
        ).to(self.device)

        # create optimizer
        self.optimizer = torch.optim.Adam(
            filter(lambda x: x.requires_grad, self.model.parameters()),
            betas=(0.9, 0.98), eps=1e-09)

        # load old model, optimizer if there is one
        if self.use_old_model:
            self.model, self.optimizer = load_checkpoint(
                os.path.join(self.load_dir, "model.bin"),
                self.model, self.optimizer, self.device)


        # create a sceduled optimizer object
        self.optimizer = ScheduledOptim(
            self.optimizer, self.config.model_dim, self.config.warmup_steps)

        #self.optimizer.optimizer.to(torch.device('cpu'))


    def train(self, num_epochs):
        metrics = {"best_epoch":0, "lowest_loss":99999999999999}

        # output an example
        #self.output_example(0)

        for epoch in range(num_epochs):
           # self.writer.add_graph(self.model)
            #self.writer.add_embedding(
            #    self.model.encoder.src_word_emb.weight, global_step=epoch)

            epoch_metrics = dict()

            # train
            epoch_metrics["train"] = self.execute_phase(epoch, "train")
            # save metrics
            metrics["epoch_{}".format(epoch)] = epoch_metrics
            with open(os.path.join(self.output_dir, "metrics.json"), "w") as f:
                json.dump(metrics, f, indent=4)

            # validate
            epoch_metrics["val"] = self.execute_phase(epoch, "val")
            # save metrics
            metrics["epoch_{}".format(epoch)] = epoch_metrics
            with open(os.path.join(self.output_dir, "metrics.json"), "w") as f:
                json.dump(metrics, f, indent=4)

            # save checkpoint
            #TODO: fix this b
            #if epoch_metrics["val"]["loss"] < metrics["lowest_loss"]:
            #if epoch_metrics["train"]["loss"] < metrics["lowest_loss"]:
            if epoch % 100 == 0:
                self.save_checkpoint(os.path.join(self.output_dir, "model_{}.bin".format(epoch)))
                #metrics["lowest_loss"] = epoch_metrics["train"]["loss"]
                #metrics["best_epoch"] = epoch

            # record metrics to tensorboard
            self.writer.add_scalar("training loss total",
                epoch_metrics["train"]["loss"], global_step=epoch)
            self.writer.add_scalar("val loss total",
                epoch_metrics["val"]["loss"], global_step=epoch)

            self.writer.add_scalar("training perplexity",
                epoch_metrics["train"]["perplexity"], global_step=epoch)
            self.writer.add_scalar("val perplexity",
                epoch_metrics["val"]["perplexity"], global_step=epoch)

            self.writer.add_scalar("training time",
                epoch_metrics["train"]["time_taken"], global_step=epoch)
            self.writer.add_scalar("val time",
                epoch_metrics["val"]["time_taken"], global_step=epoch)

            self.writer.add_scalar("train_bleu_1",
                epoch_metrics["train"]["bleu_1"], global_step=epoch)
            self.writer.add_scalar("val_bleu_1",
                epoch_metrics["val"]["bleu_1"], global_step=epoch)
            self.writer.add_scalar("train_bleu_2",
                epoch_metrics["train"]["bleu_2"], global_step=epoch)
            self.writer.add_scalar("val_bleu_2",
                epoch_metrics["val"]["bleu_2"], global_step=epoch)

            # output an example
            #self.output_example(epoch+1)

        self.writer.close()

    def execute_phase(self, epoch, phase):
        if phase == "train":
            self.model.train()
            dataloader = self.data_loader_train
            batch_size = self.config.train_batch_size
            train = True
        else:
            self.model.eval()
            dataloader = self.data_loader_val
            batch_size = self.config.val_batch_size
            train = False

        start = time.clock()
        phase_metrics = dict()
        epoch_loss = list()
        epoch_bleu_1 = list()
        epoch_bleu_2 = list()
        average_epoch_loss = None
        n_word_total = 0
        n_correct = 0
        n_word_correct = 0
        for i, batch in enumerate(tqdm(dataloader,
                          mininterval=2, desc=phase, leave=False)):
            # prepare data
            src_seq, src_pos, src_seg, tgt_seq, tgt_pos = map(
                lambda x: x.to(self.device), batch)

            gold = tgt_seq[:, 1:]

            # forward
            if train:
                self.optimizer.zero_grad()
            pred = self.model(src_seq, src_pos, src_seg, tgt_seq, tgt_pos)

            # get loss
            loss, n_correct = cal_performance(pred, gold,
                smoothing=self.config.label_smoothing)
            #average_loss = float(loss)/self.config.val_batch_size
            average_loss = float(loss)
            epoch_loss.append(average_loss)
            average_epoch_loss = np.mean(epoch_loss)

            if train:
                self.writer.add_scalar("train_loss",
                    average_loss, global_step=i + epoch * self.config.train_batch_size)
                # backward
                loss.backward()

                # update parameters
                self.optimizer.step_and_update_lr()

            # get_bleu
            output = torch.argmax(pred.view(-1, self.config.response_len-1, self.vocab_size), dim=2)
            epoch_bleu_1.append(bleu(gold, output, 1))
            epoch_bleu_2.append(bleu(gold, output, 2))

            # get_accuracy
            non_pad_mask = gold.ne(src.transformer.Constants.PAD)
            n_word = non_pad_mask.sum().item()
            n_word_total += n_word
            n_word_correct += n_correct


        phase_metrics["loss"] = average_epoch_loss
        phase_metrics["token_accuracy"] = n_correct / n_word_total

        perplexity = np.exp(average_epoch_loss)
        phase_metrics["perplexity"] = perplexity

        phase_metrics["bleu_1"] = np.mean(epoch_bleu_1)
        phase_metrics["bleu_2"] = np.mean(epoch_bleu_2)

        phase_metrics["time_taken"] = time.clock() - start
        string = ' {} loss: {:.3f} '.format(phase, average_epoch_loss)
        print(string, end='\n')
        return phase_metrics

    def save_checkpoint(self, filename):
        state = {
            'model': self.model.state_dict(),
            'optimizer': self.optimizer.optimizer.state_dict()
        }
        torch.save(state, filename)

    def output_example(self, epoch):
        random_index = random.randint(0, len(self.val_dataset))
        example = self.val_dataset[random_index]

        # prepare data
        src_seq, src_pos, src_seg, tgt_seq, tgt_pos = map(
            lambda x: torch.from_numpy(x).to(self.device).unsqueeze(0), example)

        # take out first token from target for some reason
        gold = tgt_seq[:, 1:]

        # forward
        pred = self.model(src_seq, src_pos, src_seg, tgt_seq, tgt_pos)
        output = torch.argmax(pred, dim=1)

        # get history text
        string = "history: "

        seg = -1
        for i, idx in enumerate(src_seg.squeeze()):
            if seg != idx.item():
                string+="\n"
                seg=idx.item()
            token = self.vocab.id2token[src_seq.squeeze()[i].item()]
            if token != '<blank>':
                string += "{} ".format(token)

        # get target text
        string += "\nTarget:\n"

        for idx in tgt_seq.squeeze():
            token = self.vocab.id2token[idx.item()]
            string += "{} ".format(token)

        # get prediction
        string += "\n\nPrediction:\n"

        for idx in output:
            token = self.vocab.id2token[idx.item()]
            string += "{} ".format(token)

        # print
        print("\n------------------------\n")
        print(string)
        print("\n------------------------\n")

        # add result to tensorboard
        self.writer.add_text("example_output", string, global_step=epoch)
        self.writer.add_histogram("example_vocab_ranking", pred, global_step=epoch)
        self.writer.add_histogram("example_vocab_choice", output,global_step=epoch)