Beispiel #1
0
from data import load_file
from model import SentimentModel


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
TEXT, LABEL, train, valid, test, train_iter, valid_iter, test_iter = load_file(filepath='data/',
                                                                               device=device)

INPUT_DIM = len(TEXT.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
OUTPUT_DIM = 1

model = SentimentModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

optimizer = torch.optim.SGD(model.parameters(), lr=3e-3)

criterion = nn.BCEWithLogitsLoss()

model = model.to(device)
criterion = criterion.to(device)

def binary_accuracy(preds, y):
    
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    acc = correct.sum() / len(correct)
    
    return acc

def train(model, iterator, optimizer, criterion):
Beispiel #2
0
def main():
    Config = config.get_args()
    set_seed(Config.seed)
    word2ix, ix2word, max_len, avg_len = build_word_dict(Config.train_path)

    train_data = CommentDataSet(Config.train_path, word2ix, ix2word)
    train_loader = DataLoader(
        train_data,
        batch_size=16,
        shuffle=True,
        num_workers=0,
        collate_fn=mycollate_fn,
    )
    validation_data = CommentDataSet(Config.validation_path, word2ix, ix2word)
    validation_loader = DataLoader(
        validation_data,
        batch_size=16,
        shuffle=True,
        num_workers=0,
        collate_fn=mycollate_fn,
    )
    test_data = CommentDataSet(Config.test_path, word2ix, ix2word)
    test_loader = DataLoader(
        test_data,
        batch_size=16,
        shuffle=False,
        num_workers=0,
        collate_fn=mycollate_fn,
    )

    weight = pre_weight(len(word2ix), Config.pred_word2vec_path,
                        Config.embedding_dim, word2ix, ix2word)

    model = SentimentModel(embedding_dim=Config.embedding_dim,
                           hidden_dim=Config.hidden_dim,
                           LSTM_layers=Config.LSTM_layers,
                           drop_prob=Config.drop_prob,
                           pre_weight=weight)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    optimizer = optim.Adam(model.parameters(), lr=Config.lr)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=10,
                                                gamma=0.1)  # 学习率调整
    criterion = nn.CrossEntropyLoss()

    # 因为使用tensorboard画图会产生很多日志文件,这里进行清空操作

    if os.path.exists(Config.tensorboard_path):
        shutil.rmtree(Config.tensorboard_path)
        os.mkdir(Config.tensorboard_path)

    for epoch in range(Config.epochs):
        train_loader = tqdm(train_loader)
        train_loader.set_description(
            '[%s%04d/%04d %s%f]' %
            ('Epoch:', epoch + 1, Config.epochs, 'lr:', scheduler.get_lr()[0]))
        train(epoch, Config.epochs, train_loader, device, model, criterion,
              optimizer, scheduler, Config.tensorboard_path)
        validate(epoch, validation_loader, device, model, criterion,
                 Config.tensorboard_path)

    # 模型保存
    if os.path.exists(Config.model_save_path) == False:
        os.mkdir('./modelDict/')
    torch.save(model.state_dict(), Config.model_save_path)

    confuse_meter = ConfuseMeter()
    confuse_meter = test(test_loader, device, model, criterion)
Beispiel #3
0
    preprocessor = Preprocessor(max_vocab=args.max_vocab)
    data = preprocessor.fit_transform(dataset=data)
    preprocessor.save(args.prepro_save_path)

    # validation split
    data.split_data(validation_count=args.validation_count)
    train_ds, val_ds = data.to_dataset()

    # to dataLoaders
    train_set = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True)
    val_set = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False)

    print('Initializing model...')
    mod = SentimentModel(
        len(preprocessor.vocab2enc) + 3, args.embedding_dim, args.hidden_dim)
    opt = Adam(mod.parameters(), lr=args.lr)

    print('Training...')
    fit(training=train_set,
        model=mod,
        validation=val_set,
        optimizer=opt,
        loss=torch.nn.BCELoss(),
        epochs=args.epochs)

    # Saving model
    print('Saving model...')
    torch.save(mod, args.model_save_path)

    print('Done!')
Beispiel #4
0

# Instantiate the model w/ hyperparams
vocab_size = len(vocab_to_int)+1 # +1 for the 0 padding
output_size = 1
embedding_dim = 400
hidden_dim = 256
n_layers = 2

model = SentimentModel(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)
# print(model)

# loss and optimization functions
lr=0.01
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

# training params

epochs = 2
counter = 0
print_every = 100
clip = 5

model.train()
# train for some number of epochs
for e in range(epochs):
    # initialize hidden state
    h = model.init_hidden(batch_size)

    # batch loop