Example #1
0
def main(mode, loss_function, hidden_layers, nheads, lr, dropout,
         regularization, weight_decay, n_epoch, save_every, word_emb_model,
         word_emb_dim, ner_model):
    if regularization == True:
        weight_decay == weight_decay
    else:
        weight_decay == 0
    tag_to_idx = {
        "O": 0,
        "B-PERSON": 1,
        "I-PERSON": 2,
        "B-LOC": 3,
        "I-LOC": 4,
        "B-ORG": 5,
        "I-ORG": 6,
        START_TAG: 7,
        STOP_TAG: 8
    }

    print('Hyper paramters:')
    print("Loss function: {}".format(loss_function))
    print("Learning rate: {}", format(lr))
    print("Dropout: {}", format(dropout))
    if regularization == True:
        print("Weight Decay: {}", format(weight_decay))
    print("n Epochs: {}", format(n_epoch))

    word_emb = ""
    if word_emb_model == "fasttext":
        word_emb = fasttext.load_model('cc.id.300.bin')
        word_emb = fasttext.util.reduce_model(word_emb, word_emb_dim)
    if word_emb_model == "fasttext-indobert":
        word_emb = fasttext.load_model('fasttext.4B.id.300.epoch5.uncased.bin')
        word_emb = fasttext.util.reduce_model(word_emb, word_emb_dim)
    elif word_emb_model == "indobert":
        tokenizer = BertTokenizer.from_pretrained(
            "indobenchmark/indobert-base-p1")
        model = AutoModel.from_pretrained("indobenchmark/indobert-base-p1")
        word_emb = (tokenizer, model)

    if mode == "train" or mode == "test" or mode == "all":
        train_dataset, valid_dataset, test_dataset = getData()
        print('Data loading ...')
        if mode == "train":
            #train_indobert(train_dataset, valid_dataset, tag_to_idx, DEVICE, dropout, hidden_layers, nheads, n_epoch, lr, regularization, word_emb_model, model, tokenizer, word_emb_dim)
            train(train_dataset, valid_dataset, tag_to_idx, DEVICE, dropout,
                  hidden_layers, nheads, n_epoch, lr, regularization,
                  word_emb_model, word_emb, word_emb_dim)
        if mode == "test":
            #print(test_dataset)
            model_testing(test_dataset, tag_to_idx, DEVICE, dropout,
                          hidden_layers, nheads, word_emb_model, word_emb,
                          ner_model, word_emb_dim)
Example #2
0
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--path", "-path", help="the path")
parser.add_argument("--trainFile", "-trn", help="the train file")
parser.add_argument("--valFile", "-val", help="the validation file")
parser.add_argument("--testFile", "-tst", help="the test file")
args = parser.parse_args()

assert (args.trainFile)
assert (args.valFile)
assert (args.testFile)

#german_vocab, english_vocab, train_data, valid_data, test_data = getData_newMethod()
print("===============================before loading")
spe_dec, train_data, valid_data, test_data = getData(args.path, args.trainFile,
                                                     args.valFile,
                                                     args.testFile)
print("train_data ", len(train_data.examples))
print("valid_data ", len(valid_data.examples))
print("test_data ", len(test_data.examples))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"
batch_size = 32

# data = train_data[0:3]
# for example in data:
#     src = example.src
#     trg = example.trg
#     print(">>>> ", src)
#     print(spe_dec.decode(src))
#     print("     ", trg)
Example #3
0
from torch import nn
import torch.optim as opt
from torch.nn import functional as F
from data_loader import getData
from torch.utils.data import DataLoader
import torch
from model import Model

data = getData()
""" geting batch data """

train_data = DataLoader(data, batch_size=6, shuffle=True, num_workers=2)

model = Model()
lossFun = nn.MSELoss(size_average=True)
optimizer = opt.Adagrad(model.parameters(), lr=.01)

for i in range(2):
    for i_batch, sample_batched in enumerate(train_data):
        optimizer.zero_grad()
        input, labels = sample_batched
        print("****************** batch =  ", i_batch,
              "*****************************")
        y_predic = model(input=input)
        loss = lossFun(y_predic, labels)
        print("loss= ", loss.item())
        loss.backward()
        optimizer.step()

torch.save(model, 'model/model.pt')
Example #4
0
 def __getitem__(self,index):
     if self.mode is 'train':
         trainVideo, trainAudioMagnitude, trainAudioPhase = data_loader.getData(self.mode)
Example #5
0
from utils import translate_sentence, load_checkpoint  #abc
import torch
from data_loader import getData  #, getData_newMethod
from train import train
from transfomer import Transformer

#german_vocab, english_vocab, train_data, valid_data, test_data = getData_newMethod()
print("===============================before loading")
spe_dec, train_data, valid_data, test_data = getData()
print("train_data ", len(train_data.examples))
print("valid_data ", len(valid_data.examples))
print("test_data ", len(test_data.examples))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"
batch_size = 32

# data = train_data[0:3]
# for example in data:
#     src = example.src
#     trg = example.trg
#     print(">>>> ", src)
#     print(spe_dec.decode(src))
#     print("     ", trg)
#     print(spe_dec.decode(trg))

src_vocab_size = len(spe_dec)
trg_vocab_size = len(spe_dec)
print("src vocabulary size: ", src_vocab_size)
print("trg vocabulary size: ", trg_vocab_size)
embedding_size = 256
src_pad_idx = spe_dec.pad_id()