def load(args, checkpoint_dir):
    state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth'))
    from collections import OrderedDict
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        if 'module' in k:
            namekey = k[7:]  # remove `module.`
        else:
            namekey = k
        new_state_dict[namekey] = v

    if args.model_type == 'bert':
        config = BertConfig.from_json_file(os.path.join(checkpoint_dir, 'config.bin'))
        model = BertForSequenceClassification(config)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'cnn':
        model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels,
                         num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'lstm':
        model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels,
                          hidden_size=args.hidden_size, device=args.device)
        model.load_state_dict(new_state_dict)
    elif args.model_type == 'char-cnn':
        model = CharCNN(num_features=args.num_features, num_classes=args.num_labels)
        model.load_state_dict(new_state_dict)
    else:
        raise ValueError('model type is not found!')

    return model.to(args.device)
Beispiel #2
0
def main():
    # parse arguments
    args = parser.parse_args()

    # load training data
    print("\nLoading training data...")
    train_dataset = AGNEWs(label_data_path=args.train_path, alphabet_path=args.alphabet_path)
    print("Transferring training data into iterator...")
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True)
    # feature length
    args.num_features = len(train_dataset.alphabet)

    # load developing data
    print("\nLoading developing data...")
    dev_dataset = AGNEWs(label_data_path=args.val_path, alphabet_path=args.alphabet_path)
    print("Transferring developing data into iterator...")
    dev_loader = DataLoader(dev_dataset, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True)

    class_weight, num_class_train = train_dataset.get_class_weight()
    _, num_class_dev = dev_dataset.get_class_weight()
    
    # when you have an unbalanced training set
    if args.class_weight!=None:
        args.class_weight = torch.FloatTensor(class_weight).sqrt_()
        if args.cuda:
            args.class_weight = args.class_weight.cuda()

    print('\nNumber of training samples: '+str(train_dataset.__len__()))
    for i, c in enumerate(num_class_train):
        print("\tLabel {:d}:".format(i).ljust(15)+"{:d}".format(c).rjust(8))
    print('\nNumber of developing samples: '+str(dev_dataset.__len__()))
    for i, c in enumerate(num_class_dev):
        print("\tLabel {:d}:".format(i).ljust(15)+"{:d}".format(c).rjust(8))


    # make save folder
    try:
        os.makedirs(args.save_folder)
    except OSError as e:
        if e.errno == errno.EEXIST:
            print('Directory already exists.')
        else:
            raise
    # args.save_folder = os.path.join(args.save_folder, datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))

    # configuration
    print("\nConfiguration:")
    for attr, value in sorted(args.__dict__.items()):
        print("\t{}:".format(attr.capitalize().replace('_', ' ')).ljust(25)+"{}".format(value))

    # log result
    if args.log_result:
        with open(os.path.join(args.save_folder,'result.csv'), 'w') as r:
            r.write('{:s},{:s},{:s},{:s},{:s}'.format('epoch', 'batch', 'loss', 'acc', 'lr'))
    # model
    model = CharCNN(args)
    print(model)
            
    # train 
    train(train_loader, dev_loader, model, args)
                        target[row][i].data[0]) + "_"
                print("Input:{}, Predicted:{} , Target:{}".format(
                    input_word_list[row], predicted_word, target_word))
    return totalLoss / numLines


#number of input char types
char_vocab = len(string.printable)

# number of output classes = vocab size
numOutputClass = len(labelCorpus.dictionary)
print("Number of Classes:" + str(numOutputClass))

# Initialize models and start training

encoder = CharCNN(char_vocab, args.hidden_size)

decoder = DecoderRNN(args.hidden_size, numOutputClass)

encoder_optimizer = torch.optim.Adam(encoder.parameters(),
                                     lr=args.learning_rate)
decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                     lr=args.learning_rate)
criterion = nn.CrossEntropyLoss()

if args.cuda:
    criterion.cuda()
    encoder.cuda()
    decoder.cuda()

start = time.time()
Beispiel #4
0
# print(dev_set.data)    # DEBUG
# print("numberized test set:")
# print(test_set.data)   # DEBUG
print('#token: {}'.format(len(token_vocab)))
print('#char: {}'.format(len(char_vocab)))
print('#label: {}'.format(len(label_vocab)))

# Embedding file
word_embed = load_embedding(args.word_embed,
                            dimension=args.word_embed_dim,
                            vocab=token_vocab)
charcnn_filters = [[int(f.split(',')[0]),
                    int(f.split(',')[1])]
                   for f in args.charcnn_filters.split(';')]
char_embed = CharCNN(len(char_vocab),
                     args.char_embed_dim,
                     filters=charcnn_filters)
char_hw = Highway(char_embed.output_size,
                  layer_num=args.charhw_layer,
                  activation=args.charhw_func)
feat_dim = word_embed.embedding_dim + char_embed.output_size
lstm = LSTM(feat_dim,
            args.lstm_hidden_size,
            batch_first=True,
            bidirectional=True,
            forget_bias=args.lstm_forget_bias)
crf = CRF(label_size=len(label_vocab) + 2)
linear = Linears(in_features=lstm.output_size,
                 out_features=len(label_vocab),
                 hiddens=[lstm.output_size // 2])
lstm_crf = LstmCrf(token_vocab,
    parser.add_argument('--lower', default=True)

    args = parser.parse_args()
    dataloader = load_datasets(args)
    best_error = 1000
    early_stop = 0

    if not os.path.exists(args.savedir):
        os.makedirs(args.savedir)

    model_name = args.savedir + '/' + 'best.pt'
    train_begin = time.time()
    print('train begin', '-' * 50)
    print()
    print()
    model = CharCNN(70, args.dropout)
    criterion = nn.CrossEntropyLoss()
    if args.optimizer == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
        scheduler = StepLR(optimizer, step_size=3, gamma=0.5)
    elif args.optimizer == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=0.0001)
    else:
        print('optimizer is bad')
        optimizer = None
        exit(0)

    writer = SummaryWriter('log')

    for epoch in range(args.epochs):
        epoch_begin = time.time()
def main(_):
    print("Loading data...")
    x, y, sequence_length = data_loader.read_data(FLAGS.pos_data,
                                                  FLAGS.neg_data,
                                                  FLAGS.max_word_length,
                                                  FLAGS.max_seq_length)
    print("Data Size:", len(y))
    np.random.seed(10)
    shuffle_indices = np.random.permutation(np.arange(len(y)))
    x_shuffled = x[shuffle_indices]
    y_shuffled = y[shuffle_indices]
    seq_shuffled = sequence_length[shuffle_indices]
    dev_sample_index = -1 * int(FLAGS.dev_percentage * float(len(y)))
    x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[
        dev_sample_index:]
    y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[
        dev_sample_index:]
    seq_train, seq_dev = seq_shuffled[:dev_sample_index], seq_shuffled[
        dev_sample_index:]
    del x, y, sequence_length, x_shuffled, y_shuffled, seq_shuffled
    print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement)
        session_conf.gpu_options.allow_growth = True
        #session_conf.gpu_options.per_process_gpu_memory_fraction = 0.45
        sess = tf.Session(config=session_conf)
        with sess.as_default():
            cnn = CharCNN(char_vocab_size=FLAGS.char_vocab_size,
                          char_embed_size=FLAGS.char_embed_size,
                          batch_size=FLAGS.batch_size,
                          max_word_length=FLAGS.max_word_length,
                          max_seq_length=FLAGS.max_seq_length,
                          filters=eval(FLAGS.filters),
                          filter_sizes=eval(FLAGS.filter_sizes),
                          num_classes=FLAGS.num_classes,
                          rnn_size=FLAGS.rnn_size,
                          attention_size=FLAGS.attention_size)

            save_path = os.path.join(FLAGS.save_path)
            if not os.path.isdir(save_path):
                os.makedirs(save_path)
            saver = tf.train.Saver(tf.trainable_variables())
            for v in tf.trainable_variables():
                print("Save:", v.name)

            sess.run(tf.global_variables_initializer())

            check_point_dir = os.path.join(FLAGS.save_path)
            ckpt = tf.train.get_checkpoint_state(check_point_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                print("Reading model parameters from %s" %
                      ckpt.model_checkpoint_path)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                print("Created model with fresh parameters.")

            batches = data_loader.batch_iter(
                list(zip(x_train, y_train, seq_train)), FLAGS.batch_size,
                FLAGS.num_epochs)

            gloabl_max_acc = 0
            for batch in batches:
                x_batch, y_batch, seq_batch = zip(*batch)
                train_step(x_batch, y_batch, seq_batch, sess, cnn)
                current_step = tf.train.global_step(sess, cnn.global_step)
                if current_step % FLAGS.evaluate_every == 0:
                    max_dev_acc = 0
                    print("\nEvaluation:")
                    batches_dev = data_loader.batch_iter(
                        list(zip(x_dev, y_dev, seq_dev)), FLAGS.batch_size, 1)
                    for batch_dev in batches_dev:
                        x_batch_dev, y_batch_dev, seq_batch_dev = zip(
                            *batch_dev)
                        max_dev_acc = dev_step(x_batch_dev, y_batch_dev,
                                               seq_batch_dev, sess, cnn,
                                               max_dev_acc)
                    print("During this evaluation phase, the max accuracy is:",
                          max_dev_acc)
                    if max_dev_acc > gloabl_max_acc:
                        gloabl_max_acc = max_dev_acc
                    print("\n Until now, the max accuracy is:", gloabl_max_acc)
                if current_step % FLAGS.checkpoint_every == 0:
                    path = saver.save(sess,
                                      os.path.join(save_path, "model"),
                                      global_step=current_step)
                    print("Saved model checkpoint to {}\n".format(path))
Beispiel #7
0
    print("\nLoading testing data...")
    test_dataset = AGNEWs(label_data_path=args.test_path,
                          alphabet_path=args.alphabet_path)
    print("Transferring testing data to iterator...")
    test_loader = DataLoader(test_dataset,
                             batch_size=args.batch_size,
                             num_workers=args.num_workers,
                             drop_last=True)

    _, num_class_test = test_dataset.get_class_weight()
    print('\nNumber of testing samples: ' + str(test_dataset.__len__()))
    for i, c in enumerate(num_class_test):
        print("\tLabel {:d}:".format(i).ljust(15) + "{:d}".format(c).rjust(8))

    args.num_features = len(test_dataset.alphabet)
    model = CharCNN(args)
    print("=> loading weights from '{}'".format(args.model_path))
    assert os.path.isfile(
        args.model_path), "=> no checkpoint found at '{}'".format(
            args.model_path)
    checkpoint = torch.load(args.model_path)
    model.load_state_dict(checkpoint['state_dict'])

    # using GPU
    if args.cuda:
        model = torch.nn.DataParallel(model).cuda()

    model.eval()
    corrects, avg_loss, accumulated_loss, size = 0, 0, 0, 0
    predicates_all, target_all = [], []
    print('\nTesting...')
Beispiel #8
0
logger.info('Building the model')
word_embed = Embedding(Config({
    'num_embeddings': len(token_vocab),
    'embedding_dim': args.word_embed_dim,
    'padding': C.EMBED_START_IDX,
    'padding_idx': 0,
    'sparse': True,
    'trainable': True,
    'file': embed_file,
    'stats': args.embed_skip_first,
    'vocab': token_vocab,
    'ignore_case': word_ignore_case
}))
char_cnn = CharCNN(Config({
    'vocab_size': len(char_vocab),
    'padding': C.CHAR_EMBED_START_IDX,
    'dimension': args.char_embed_dim,
    'filters': charcnn_filters
}))
char_highway = Highway(Config({
    'num_layers': 2,
    'size': char_cnn.output_size,
    'activation': 'selu'
}))
lstm = LSTM(Config({
    'input_size': word_embed.output_size + char_cnn.output_size,
    'hidden_size': args.lstm_hidden_size,
    'forget_bias': 1.0,
    'batch_first': True,
    'bidirectional': True
}))
crf = CRF(Config({
Beispiel #9
0
print("Train data sample number: {:d}".format(len(y_train)))

# Training
# ==================================================

with tf.Graph().as_default():
    ## define training computation graph
    learning_rate = 0.001
    m, n = x_train.shape
    print('x_train\'s shape is', x_train.shape)
    print(x_train[0])
    session_conf = tf.ConfigProto(
        allow_soft_placement=FLAGS.allow_soft_placement,
        log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    cnn = CharCNN()

    global_step = tf.Variable(0, name="global_step", trainable=False)
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
                                           momentum=0.8)
    train_op = optimizer.minimize(cnn.loss)
    init = tf.global_variables_initializer()
    n_batches = int(np.ceil(m / FLAGS.batch_size))

    # create a Saver node after all variable nodes are created
    saver = tf.train.Saver()

    # Output directory for models and summaries
    now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
    checkpoint_dir = os.path.abspath(os.path.join(os.path.curdir, "cv"))
    print("Writing check point to {}\n".format(checkpoint_dir))
Beispiel #10
0
word_embed = Embedding(
    Config({
        'num_embeddings': len(token_vocab),
        'embedding_dim': train_args['word_embed_dim'],
        'padding': C.EMBED_START_IDX,
        'padding_idx': 0,
        'sparse': True,
        'trainable': True,
        'stats': train_args['embed_skip_first'],
        'vocab': token_vocab,
        'ignore_case': train_args['word_ignore_case']
    }))
char_cnn = CharCNN(
    Config({
        'vocab_size': len(char_vocab),
        'padding': C.CHAR_EMBED_START_IDX,
        'dimension': train_args['char_embed_dim'],
        'filters': charcnn_filters
    }))
char_highway = Highway(
    Config({
        'num_layers': 2,
        'size': char_cnn.output_size,
        'activation': 'selu'
    }))
lstm = LSTM(
    Config({
        'input_size': word_embed.output_size + char_cnn.output_size,
        'hidden_size': train_args['lstm_hidden_size'],
        'forget_bias': 1.0,
        'batch_first': True,
from model import CharCNN
import torch

model = CharCNN(70, 0.5)
model.load_state_dict(torch.load('save_model/best.pt'))

sent = "U.S. Brokers Cease-fire in Western Afghanistan KABUL (Reuters) - The United States has brokered a  cease-fire between a renegade Afghan militia leader and the  embattled governor of the western province of Herat,  Washington's envoy to Kabul said Tuesday."
sent_tensor = torch.zeros(1014).long()
alphabet = "abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}\n"
for i, char in enumerate(sent):
    if i == 1014:
        break
    alphabet_index = alphabet.find(char)
    if alphabet_index != -1:
        sent_tensor[i] = alphabet_index

sent_tensor = sent_tensor.view(-1, sent_tensor.size(0))
out_feature = model(sent_tensor)
out_feature = out_feature.squeeze(0)
print('out_feature:', out_feature)
Beispiel #12
0
seed_everything(config.seed)
validation_split = .2
shuffle_dataset = True

dataset = CustomDatasetFromCSV("train.csv")

# Creating data indices for training and validation splits:
dataset_size = len(dataset)
indices = list(range(dataset_size))
split = int(np.floor(validation_split * dataset_size))

np.random.seed(config.seed)
np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
valid_sampler = SubsetRandomSampler(val_indices)

train_loader = torch.utils.data.DataLoader(dataset,
                                           sampler=train_sampler,
                                           batch_size=128,
                                           num_workers=4)
validation_loader = torch.utils.data.DataLoader(dataset,
                                                sampler=valid_sampler,
                                                num_workers=4)

model = CharCNN(train_ds=train_loader, val_ds=validation_loader)
trainer = Trainer(gpus=1, fast_dev_run=True, max_epochs=1)
trainer.fit(model)
Beispiel #13
0
                     input_size)
    train_data, train_labels = dataTrain.convert_data()

    dataVal = Data(list(zip(contents_val, labels_val)), alphabet, input_size)
    val_data, val_labels = dataVal.convert_data()

    dataTest = Data(list(zip(contents_test, labels_test)), alphabet,
                    input_size)
    test_data, test_labels = dataTest.convert_data()

    # Initialize the model
    model = CharCNN(input_sz=config["data"]["input_size"],
                    alphabet_sz=config["data"]["alphabet_size"],
                    emb_sz=config["char_cnn_zhang"]["embedding_size"],
                    conv_layers=config["char_cnn_zhang"]["conv_layers"],
                    fc_layers=[],
                    threshold=config["char_cnn_zhang"]["threshold"],
                    dropout_p=config["char_cnn_zhang"]["dropout_p"],
                    optimizer=config["char_cnn_zhang"]["optimizer"],
                    loss=config["char_cnn_zhang"]["loss"])

    # Train
    model.train(train_inputs=train_data,
                train_labels=train_labels,
                val_inputs=val_data,
                val_labels=val_labels,
                epochs=config["training"]["epochs"],
                bs=config["training"]["batch_size"])

    # Evaluate
    results = model.test(test_data, test_labels, bs=128)
Beispiel #14
0
# TODO: Create a f****n' correct cross validation procedure
x_train, x_dev = x_shuffled[:-n_dev_samples], x_shuffled[-n_dev_samples:]
y_train, y_dev = y_shuffled[:-n_dev_samples], y_shuffled[-n_dev_samples:]
print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))


# Training
# ==================================================

with tf.Graph().as_default():
    session_conf = tf.ConfigProto(
      allow_soft_placement=FLAGS.allow_soft_placement,
      log_device_placement=FLAGS.log_device_placement)
    sess = tf.Session(config=session_conf)
    with sess.as_default():
        cnn = CharCNN(l2_reg_lambda=FLAGS.l2_reg_lambda)

        # Define Training procedure
        global_step = tf.Variable(0, name="global_step", trainable=False)
        optimizer = tf.train.AdamOptimizer(1e-3)
        grads_and_vars = optimizer.compute_gradients(cnn.loss)
        train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
Beispiel #15
0
def main(unused_argv):
    train_data_path = FLAGS.train_data_path
    val_data_path = FLAGS.validate_data_path

    # load train data
    train_data = DataSet(train_data_path)
    dev_data = DataSet(val_data_path)
    train_data.dataset_process()
    dev_data.dataset_process()

    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(allow_soft_placement=True,        log_device_placement=False)
        sess = tf.Session(config = session_conf)
        with sess.as_default():
            cnn = CharCNN(
                l0 = Config.l0,
                num_classes = Config.nums_classes,
                conv_layers = Config.model.conv_layers,
                fc_layers = Config.model.fc_layers,
                l2_reg_lambda = 0
                )
            global_step = tf.Variable(0, name = 'global_step', trainable = False)
            optimizer = tf.train.AdamOptimizer(Config.model.learning_rate)
            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

            grad_summaries = []
            for g, v in grads_and_vars:
                if g is not None:
                    grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
                    sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                    grad_summaries.append(grad_hist_summary)
                    grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)

        timestamp = str(int(time.time()))
        out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
        print("Writing to {}\n".format(out_dir))

        loss_summary = tf.summary.scalar("loss", cnn.loss)
        acc_summary = tf.summary.scalar("accuracy", cnn.accuracy)

        # Train summaries
        train_summary_op = tf.summary.merge([loss_summary, acc_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(out_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, acc_summary])
        dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver(tf.global_variables())

        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        def train_step(x_batch, y_batch):
            feed_dict = {
                cnn.input_x: x_batch,
                cnn.input_y: y_batch,
                cnn.dropout_keep_prob: Config.model.dropout_keep_prob
            }
            _, step, summaries, loss, accuracy = sess.run(
                [train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            train_summary_writer.add_summary(summaries, step)

        def dev_step(x_batch, y_batch, writer=None):
            """
            Evaluates model on a dev set
            """
            feed_dict = {
              cnn.input_x: x_batch,
              cnn.input_y: y_batch,
              cnn.dropout_keep_prob: 1.0
            }
            step, summaries, loss, accuracy = sess.run(
                [global_step, dev_summary_op, cnn.loss, cnn.accuracy],
                feed_dict)
            time_str = datetime.datetime.now().isoformat()
            print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
            if writer:
                writer.add_summary(summaries, step)

        print "初始化完毕,开始训练"
        for i in range(Config.training.epoches):
            batch_train = train_data.next_batch()
            # 训练模型
            train_step(batch_train[0], batch_train[1])
            current_step = tf.train.global_step(sess, global_step)
            # train_step.run(feed_dict={x: batch_train[0], y_actual: batch_train[1], keep_prob: 0.5})
            # 对结果进行记录
            if current_step % Config.training.evaluate_every == 0:
                print("\nEvaluation:")
                dev_step(dev_data.doc_image, dev_data.label_image, writer=dev_summary_writer)
                print("")
            if current_step % Config.training.checkpoint_every == 0:
                path = saver.save(sess, checkpoint_prefix, global_step=current_step)
                print("Saved model checkpoint to {}\n".format(path))
Beispiel #16
0
token_vocab = state['vocab']['token']
label_vocab = state['vocab']['label']
char_vocab = state['vocab']['char']
train_args = state['args']
charcnn_filters = [[int(f.split(',')[0]),
                    int(f.split(',')[1])]
                   for f in train_args['charcnn_filters'].split(';')]

# Resume model
logger.info('Resuming the model')
word_embed = torch.nn.Embedding(train_args['word_embed_size'],
                                train_args['word_embed_dim'],
                                sparse=True,
                                padding_idx=C.PAD_INDEX)
char_embed = CharCNN(len(char_vocab),
                     train_args['char_embed_dim'],
                     filters=charcnn_filters)
char_hw = Highway(char_embed.output_size,
                  layer_num=train_args['charhw_layer'],
                  activation=train_args['charhw_func'])
feat_dim = word_embed.embedding_dim + char_embed.output_size
lstm = LSTM(feat_dim,
            train_args['lstm_hidden_size'],
            batch_first=True,
            bidirectional=True,
            forget_bias=train_args['lstm_forget_bias'])
crf = CRF(label_size=len(label_vocab) + 2)
linear = Linear(in_features=lstm.output_size, out_features=len(label_vocab))
lstm_crf = LstmCrf(token_vocab,
                   label_vocab,
                   char_vocab,
        # Add predicted character to string and use as next input
        predicted_word = labelCorpus.idxToWord(top_i)
        target_word = labelCorpus.idxToWord(target[i].data[0])
        print("Input:{}, Predicted:{} , Target:{}".format(
            input_word_list[i], predicted_word, target_word))


#number of input char types
char_vocab = len(string.printable)

# number of output classes = vocab size
numOutputClass = len(labelCorpus.dictionary)

# Initialize models and start training

decoder = CharCNN(char_vocab, args.hidden_size, numOutputClass)
decoder_optimizer = torch.optim.Adam(decoder.parameters(),
                                     lr=args.learning_rate)
criterion = nn.CrossEntropyLoss()

if args.cuda:
    decoder.cuda()

start = time.time()
all_losses = []
loss_avg = 0

try:
    print("Training for %d epochs..." % args.n_epochs)
    for epoch in tqdm(range(1, args.n_epochs + 1)):
        loss = train(*random_training_set(args.batch_size, linesInTrain))
Beispiel #18
0
    params = {'dim': (input_size,),
              'batch_size': config["training"]["batch_size"],
              'n_classes': config["data"]["n_classes"],
              'shuffle': True}

    # Datasets
    with open(config["data"]["save_ratings"], 'rb') as fp:
        labels = pickle.load(fp)

    # Generators
    training_generator = DataGenerator(train_indices, labels, config["data"]["save_reviews"], **params)
    validation_generator = DataGenerator(valid_indices, labels, config["data"]["save_reviews"], **params)

    # Define model
    model = CharCNN(input_size=input_size,
                    alphabet_size=alphabet_size,
                     embedding_size=config["char_cnn"]["embedding_size"],
                     conv_layers=config["char_cnn"]["conv_layers"],
                     fully_connected_layers=config["char_cnn"]["fully_connected_layers"],
                     n_classes=config["data"]["n_classes"],
                     threshold=config["char_cnn"]["threshold"],
                     dropout_p=config["char_cnn"]["dropout_p"],
                     optimizer=config["char_cnn"]["optimizer"],
                     loss=config["char_cnn"]["loss"])
    # Train model
    model.train(training_gen=training_generator,
                validation_gen=validation_generator,
                epochs=config["training"]["epochs"],
                batch_size=config["training"]["batch_size"],
                log_freq=config["training"]["log_freq"])
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument('--data_dir', default=None, type=str, required=True, help="The input data dir.")
    parser.add_argument('--model_type', default=None, type=str, required=True,
                        help="Model type selected in [bert, xlnet, xlm, cnn, lstm]")
    parser.add_argument('--model_name_or_path', default='bert-base-uncased', type=str,
                        help="Shortcut name is selected in [bert-base-uncased, ]")
    parser.add_argument('--task_name', default=None, type=str, required=True,
                        help="The name of task is selected in [imdb, amazon]")
    parser.add_argument('--output_dir', default='../out', type=str,
                        help="The output directory where the model predictions and checkpoints will be written.")
    # other parameters
    parser.add_argument("--cache_dir", default='../cache', type=str, help="Store the cache files.")
    parser.add_argument("--max_seq_length", default=256, type=int,
                        help="The maximum total input sequence length after tokenization.")
    parser.add_argument("--batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.")
    parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.")
    parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay")
    parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.")
    parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm. Avoiding over-fitting.")
    parser.add_argument("--num_train_epochs", default=20, type=int, help="Total number of training epochs to perform.")
    parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.")
    parser.add_argument("--seed", default=42, type=int, help="Random seed for initializaiton.")
    parser.add_argument("--train", action='store_true', help="Whether to run training.")
    parser.add_argument("--eval", action='store_true', help="Whether to run eval on dev set.")
    parser.add_argument("--ckpt", default=-1, type=int, help="Which ckpt to load.")
    parser.add_argument("--from_scratch", action='store_true', help="Whether to train from scratch.")
    parser.add_argument("--train_type", default='normal', type=str, help="Train type is selected in [normal, rs].")

    args = parser.parse_args()

    if not os.path.exists(args.data_dir):
        raise ValueError("input data dir is not exist.")

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    args.device = device

    logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        level=logging.INFO)
    logger.warning("model type: %s, task name: %s, device: %s, ", args.model_type, args.task_name, device)

    # set seed
    set_seed(args)
    # Prepare task
    if args.task_name not in processors:
        raise ValueError("Task not found: %s" % args.task_name)

    task_class = processors[args.task_name]()
    label_list = task_class.get_labels()
    num_labels = len(label_list)
    args.num_labels = num_labels

    # load model.
    # MODEL_CLASSES = {
    #     'bert': (BertConfig, BertForSequenceClassification, BertTokenizer),
    #     # 'xlnet': (XLNetConfig, XLNetForSequenceClassification, XLNetTokenizer),
    #     # 'xlm': (XLMConfig, XLMForSequenceClassification, XLMTokenizer),
    # }
    model = None
    tokenizer = BertTokenizer.from_pretrained(args.model_name_or_path, do_lower_case=True)
    args.vocab_size = tokenizer.vocab_size
    if args.model_type == 'bert':
        config = BertConfig.from_pretrained(args.model_name_or_path, num_labels=num_labels,
                                            finetuning_task=args.task_name)
        model = BertForSequenceClassification.from_pretrained(args.model_name_or_path, config=config)
    elif args.model_type == 'cnn':
        args.embed_size = 300
        args.num_filters = 100
        args.filter_sizes = (3,)
        model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels,
                         num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device)
    elif args.model_type == 'lstm':
        args.embed_size = 300
        args.hidden_size = 100
        model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=num_labels,
                          hidden_size=args.hidden_size, device=args.device)
    elif args.model_type == 'char-cnn':
        args.alphabets = 'abcdefghijklmnopqrstuvwxyz0123456789-,;.!?:\'"\\/|_@#$%^&*~`+-=<>()[]{}\n'
        args.num_features = len(args.alphabets)
        args.l0 = 1014
        model = CharCNN(num_features=args.num_features, num_classes=args.num_labels)
    else:
        raise ValueError('model type is not found!')

    model.to(device)
    logger.info("Training/evaluation parameters %s", args)

    # Create output directory if needed
    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)
    # Create cache directory if needed
    if not os.path.exists(args.cache_dir):
        os.makedirs(args.cache_dir)

    train_dataset = None
    if args.model_type != 'char-cnn':
        if args.train:
            train_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=False)
        eval_dataset = load_and_cache_normal_example(args, tokenizer, evaluate=True)
    else:
        if args.train:
            train_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=False)
        eval_dataset = load_and_cache_normal_char_example(args, args.alphabets, evaluate=True)
    # Training
    if args.train:
        if args.from_scratch:  # default False
            global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset)
        else:
            if args.ckpt < 0:
                checkpoints = glob.glob(
                    args.output_dir + '/normal_{}_{}_checkpoint-*'.format(args.task_name, args.model_type))
                checkpoints.sort(key=lambda x: int(x.split('-')[-1]))
                checkpoint = checkpoints[-1]
                ckpt = int(checkpoint.split('-')[-1])
            else:
                checkpoint = os.path.join(args.output_dir, 'normal_{}_{}_checkpoint-{}'.format(args.task_name, args.model_type, args.ckpt))
                ckpt = args.ckpt
            model = load(args, checkpoint)
            print("Load model from {}".format(checkpoint))
            global_step, train_loss = normal_train(args, model, train_dataset, eval_dataset, ckpt + 1)
        logger.info(" global_step = %s, average loss = %s", global_step, train_loss)

        # logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        # model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
        # if args.model_type == 'bert':
        #     model_to_save.save_pretrained(args.output_dir)
        # else:
        #     torch.save({'state_dict': model_to_save.state_dict()}, os.path.join(args.output_dir, '{}_{}_normal_checkpoint.pth.tar'.format(args.task_name, args.model_type)))
        # tokenizer.save_pretrained(args.output_dir)
        # # Good practice: save your training arguments together with the trained model
        # torch.save(args, os.path.join(args.output_dir, '{}_{}_normal_training_args.bin'.format(args.task_name, args.model_type)))

    # save model in two ways, one is model_to_save.save_pretrained(output_dir), other is torch.save({'state_dict':
    # model.state_dict()}, output_file). loading way is different, BertForSequenceClassifition.from_pretrained(
    # output_dir), other is ckpt = torch.load('config.bin'); model = model_class.from_pretrained(ckpt); model.load_state_dict(state_dict)

    # Evaluation
    if args.eval:
        if args.ckpt < 0:
            checkpoints = glob.glob(
                args.output_dir + '/{}_{}_{}_checkpoint-*'.format(args.train_type, args.task_name, args.model_type))
            checkpoints.sort(key=lambda x: int(x.split('-')[-1]))
            checkpoint = checkpoints[-1]
        else:
            checkpoint = os.path.join(args.output_dir, '{}_{}_{}_checkpoint-{}'.format(args.train_type, args.task_name, args.model_type, args.ckpt))
        model = load(args, checkpoint)
        print("Evaluation result, load model from {}".format(checkpoint))
        acc = evaluate(args, model, eval_dataset)
        print("acc={:.4f}".format(acc))
Beispiel #20
0
def main():
    # parse arguments
    args = parser.parse_args(args=[])

    # load training data
    train_dataset = Novels(label_data_path=args.train_path,
                           alphabet_path=args.alphabet_path)

    # load developing data
    print("\nLoading developing data...")

    dev_dataset = Novels(label_data_path=args.val_path,
                         alphabet_path=args.alphabet_path)

    #Do the splitting--20% chosen
    num_train = len(train_dataset)
    indices = list(range(num_train))
    valid_size = 0.20
    random_seed = 1
    shuffle = True
    split = int(np.floor(valid_size * num_train))
    if shuffle:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              sampler=train_sampler,
                              num_workers=args.num_workers,
                              drop_last=True,
                              pin_memory=False)

    # feature length
    args.num_features = len(train_dataset.alphabet)

    dev_loader = DataLoader(dev_dataset,
                            batch_size=args.batch_size,
                            sampler=valid_sampler,
                            num_workers=args.num_workers,
                            pin_memory=False)

    class_weight, num_class_train = train_dataset.get_class_weight()
    _, num_class_dev = dev_dataset.get_class_weight()

    print("Transferring developing data into iterator...")

    # when you have an unbalanced training set
    if args.class_weight != None:
        args.class_weight = torch.FloatTensor(class_weight).sqrt_()
        if args.cuda:
            args.class_weight = args.class_weight.cuda()

    print('\nNumber of training samples: ' + str(train_dataset.__len__()))

    # make save folder
    try:
        os.makedirs(args.save_folder)
    except OSError as e:
        if e.errno == errno.EEXIST:
            print('Directory already exists.')
        else:
            raise

    # configuration
    print("\nConfiguration:")
    for attr, value in sorted(args.__dict__.items()):
        print("\t{}:".format(attr.capitalize().replace('_', ' ')).ljust(25) +
              "{}".format(value))

    # log result
    if args.log_result:
        with open(os.path.join(args.save_folder, 'result.csv'), 'w') as r:
            r.write('{:s},{:s},{:s},{:s},{:s}'.format('epoch', 'batch', 'loss',
                                                      'acc', 'lr'))
    # model
    model = CharCNN(args)
    print(model)

    # train
    train(train_loader, dev_loader, model, args)
Beispiel #21
0
from model import CharCNN
from torch.autograd import Variable
import torch

charcnn = CharCNN(5, 30, 60)
test_input = Variable(torch.randn(2, 5, 30))

print(charcnn(test_input))