Beispiel #1
0
def evaluate(args, model, tokenizer, suffix=None):
    eval_dataset = load_dataset(args.eval_path, tokenizer, args.max_seq_length)

    args.eval_batch_size = args.eval_batch_size
    eval_sampler = SequentialSampler(eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler, batch_size=args.eval_batch_size,
                                 collate_fn=collate_batch)

    # Eval
    logger.info("***** Running evaluation *****")
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0

    predictions = []
    for batches in tqdm(eval_dataloader, desc="Evaluating"):
        model.eval()
        with torch.no_grad():
            loss, logits_list = forward_gloss_selection(args, model, batches)

        eval_loss += loss
        predictions.extend([torch.argmax(logits, dim=-1).item() for logits in logits_list])
        nb_eval_steps += 1

    eval_loss = eval_loss / nb_eval_steps
    write_predictions(args.output_dir, args.eval_path, predictions, suffix)

    return eval_loss.item()
Beispiel #2
0
def main(_):
    dataset = load_dataset()
    train_dataset = DatasetPair(dataset.train.images, dataset.train.labels)
    test_dataset = DatasetPair(dataset.test.images, dataset.test.labels)

    log_dir = FLAGS.logdir

    hparams = get_hparams(FLAGS.hparams)
    # Data augmentation: apply random horizontal flip and random crop
    if hparams.augment_percent > 0:
        images, labels = augment_data(train_dataset.X, train_dataset.Y, 28, 28,
                                      1, hparams.augment_percent)
        train_dataset = DatasetPair(images, labels)

    model_class = models[FLAGS.model]
    model = model_class(hparams, image_size=28, num_channels=1, num_classes=10)
    fashion_classiffier = FashionClassifier(model,
                                            train_dataset,
                                            test_dataset,
                                            batch_size=hparams.batch_size,
                                            log_dir=log_dir)
    if FLAGS.action == 'train':
        resume_training = FLAGS.resume_training
        create_embeddings = FLAGS.create_embeddings
        fashion_classiffier.train_and_evaluate(
            num_epochs=hparams.num_epochs,
            resume_training=resume_training,
            print_cost=True,
            create_embeddings=create_embeddings)
    elif FLAGS.action == 'load':
        fashion_classiffier.load_and_evaluate()
Beispiel #3
0
def train(num_epochs, save_dir, dataset, restore):
    data = dataset_utils.load_dataset(dataset)

    split = 0.1
    train_split, validation_split, batch_size = dataset_utils.create_generator_and_validation(data, split)
    model, optimizer, loss_function = models.create_model()

    max_to_keep = 25
    checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
    manager = tf.train.CheckpointManager(checkpoint, directory='checkpoints', max_to_keep=max_to_keep)

    best_checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)
    best_manager = tf.train.CheckpointManager(best_checkpoint, directory='checkpoints/best', max_to_keep=1)

    log_dir = f'logs/{datetime.now().strftime("%d-%b-%Y_%H.%M.%S")}'

    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=num_epochs*0.1),
        tf.keras.callbacks.TensorBoard(log_dir=log_dir, write_graph=False),
        CheckpointCallback(manager, best_manager)
    ]

    if restore:
        print('Restoring from latest checkpoint.')
        manager.restore_or_initialize()

    model.fit(x=train_split, epochs=num_epochs, batch_size=batch_size, validation_data=validation_split, callbacks=callbacks)

    print(f'TensorBoard logs can be found under \'{log_dir}\'.')

    best_manager.restore_or_initialize()
    print(f'Saving model as \'{save_dir}\'.')
    model.save(save_dir)
def main(_):
    dataset = load_dataset()
    X_train, Y_train = dataset.train.images, dataset.train.labels
    X_test, Y_test = dataset.test.images, dataset.test.labels

    if FLAGS.hparams_path:
        with open(FLAGS.hparams_path) as hparams_json:
            param_grid = json.load(hparams_json)
    else:
        param_grid = {
            "conv1_depth": [16, 32, 64],
            "conv2_depth": [64, 128],
            "dense_layer_units": [1024, 2048],
            "batch_size": [64, 128],
            "lambd": [0.5],
            "num_epochs": [5]
        }

    hparams_subset = hparam_product(param_grid)
    shuffle(hparams_subset)
    grid_size = min(len(hparams_subset), FLAGS.grid_size)
    hparams_subset = hparams_subset[:grid_size]

    for hparams in hparams_subset:
        print('***************************************')
        print('Training with: {0}\n\n'.format(hparam_string(hparams)))

        checkpoint_subdir = hparam_string(hparams)
        conv_depths = [hparams['conv1_depth'], hparams['conv2_depth']]
        dense_layer_units = hparams['dense_layer_units']
        batch_size = hparams['batch_size']
        lambd = hparams['lambd']
        num_epochs = hparams['num_epochs']
        keep_prob = hparams['keep_prob']

        log_dir = os.path.join(FLAGS.logdir, checkpoint_subdir)
        fashion_classifier = FashionClassifier(X_train,
                                               Y_train,
                                               X_test,
                                               Y_test,
                                               image_size=28,
                                               num_channels=1,
                                               num_classes=10,
                                               log_dir=log_dir)

        fashion_classifier.model(padding='SAME',
                                 patch_size=5,
                                 conv_depths=conv_depths,
                                 dense_layer_units=dense_layer_units,
                                 learning_rate=0.001,
                                 batch_size=batch_size,
                                 keep_prob=keep_prob,
                                 lambd=lambd)

        fashion_classifier.train_and_evaluate(num_epochs=num_epochs,
                                              resume_training=False,
                                              print_cost=True)
Beispiel #5
0
def main():
    args = config.config()

    if not args.train_data_path:
        logger.info("please input train dataset path")
        exit()
    # if not (args.dev_data_path or args.test_data_path):
    #     logger.info("please input dev or test dataset path")
    #     exit()

    all_ = dataset.load_dataset(args.train_data_path, args.dev_data_path, args.test_data_path, \
                     args.txt_embedding_path, args.cpt_embedding_path, args.train_batch_size, \
                                                         args.dev_batch_size, args.test_batch_size)
    txt_TEXT, cpt_TEXT, txt_vocab_size, cpt_vocab_size, txt_word_embeddings, cpt_word_embeddings, \
           train_iter, dev_iter, test_iter, label_size = all_

    model = STCK_Atten(txt_vocab_size, cpt_vocab_size, args.embedding_dim, txt_word_embeddings,\
                        cpt_word_embeddings, args.hidden_size, label_size)

    if torch.cuda.is_available():
        model = model.cuda()

    train_data, test_data = dataset.train_test_split(train_iter, 0.8)
    train_data, dev_data = dataset.train_dev_split(train_data, 0.8)
    loss_func = torch.nn.CrossEntropyLoss()

    if args.load_model:
        model.load_state_dict(torch.load(args.load_model))
        test_loss, acc, p, r, f1 = eval_model(model, test_data, loss_func)
        logger.info(
            'Test Loss:%.4f, Test Acc:%.4f, Test P:%.4f, Test R:%.4f, Test F1:%.4f',
            test_loss, acc, p, r, f1)
        return

    best_score = 0.0
    test_loss, test_acc, test_p, test_r, test_f1 = 0, 0, 0, 0, 0
    for epoch in range(args.epoch):
        train_loss, eval_loss, acc, p, r, f1 = train_model(
            model, train_data, dev_data, epoch, args.lr, loss_func)

        logger.info('Epoch:%d, Training Loss:%.4f', epoch, train_loss)
        logger.info(
            'Epoch:%d, Eval Loss:%.4f, Eval Acc:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f',
            epoch, eval_loss, acc, p, r, f1)

        if f1 > best_score:
            best_score = f1
            torch.save(
                model.state_dict(),
                'results/%d_%s_%s.pt' % (epoch, 'Model', str(best_score)))
            test_loss, test_acc, test_p, test_r, test_f1 = eval_model(
                model, test_data, loss_func)
        logger.info(
            'Test Loss:%.4f, Test Acc:%.4f, Test P:%.4f, Test R:%.4f, Test F1:%.4f',
            test_loss, test_acc, test_p, test_r, test_f1)
Beispiel #6
0
def train():

    model = MonoNet(cfg['model'])

    if cfg['std']['pretrain']:
        model.load_weights(cfg['std']['pretrain'])
        print('[info] pretrained weighted loaded from : {}'.format(
            cfg['std']['pretrain']))

    train_ds = load_dataset(cfg['std']['train_file'], cfg)
    val_ds = load_dataset(cfg['std']['val_file'], cfg)

    callbacks = [
        keras.callbacks.TensorBoard(log_dir='./logs/{}'.format(
            cfg['std']['log_code']),
                                    update_freq='epoch',
                                    profile_batch='10,20'),
        keras.callbacks.ModelCheckpoint('./logs/{}/model/weights.ckpt'.format(
            cfg['std']['log_code']),
                                        save_weights_only=True,
                                        save_best_only=True)
    ]

    helpers.dump_config(cfg)
    tf_utils.print_summary(model, cfg)

    model.compile(keras.optimizers.Adam(learning_rate=cfg['model']['lr']),
                  keras.optimizers.Adam(learning_rate=cfg['model']['lr']),
                  eager=True)

    model.fit(train_ds,
              validation_data=val_ds,
              validation_steps=1,
              validation_freq=1,
              callbacks=callbacks,
              epochs=cfg['model']['epochs'],
              steps_per_epoch=cfg['std']['val_freq'],
              verbose=0)
Beispiel #7
0
def inference():

    model = MonoNet(cfg['model'])
    model.load_weights(WEIGHTS)
    print('[info] model weights loaded.')

    dataset = load_dataset(DATASET, cfg, False)

    for inputs, label in dataset:

        calib = inputs['calib']

        pred_c, pred_attr, pred_clf = model(
            [inputs['img'], calib['calib'], calib['img_orig']])

        img = tf.squeeze(inputs['img'], 0)
        scan = tf.squeeze(inputs['scan'], 0)
        pred_c = tf.squeeze(pred_c, 0)
        pred_attr = tf.squeeze(pred_attr, 0)
        pred_clf = tf.squeeze(pred_clf, 0)

        scores = tf.expand_dims(tf.reduce_max(pred_clf, 1), -1)
        pred_clf = tf.expand_dims(tf.math.argmax(pred_clf, 1), -1)
        scores = tf.where(pred_clf != cfg['model']['n_classes'] - 1, scores,
                          tf.zeros_like(scores))

        pred_c, pred_attr, pred_clf, scores = tf_utils.objectness_mask(
            pred_c, pred_attr, pred_clf, scores, SCORE_THRESH)

        if pred_c.shape[0] == 0: continue

        if NMS:
            boxes = tf.squeeze(
                tf_utils.scenenet_to_aabb(tf.expand_dims(pred_c, 0),
                                          tf.expand_dims(pred_attr, 0)), 0)
            nms_inds = helpers.nms(boxes.numpy(), scores.numpy(),
                                   cfg['std']['max_labels'], NMS_THRESH)
            pred_c = tf.gather(pred_c, nms_inds)
            pred_attr = tf.gather(pred_attr, nms_inds)
            pred_clf = tf.gather(pred_clf, nms_inds)
            pred_boxes = tf.gather(boxes, nms_inds)
        else:
            pred_boxes = tf.squeeze(
                tf_utils.scenenet_to_aabb(tf.expand_dims(pred_c, 0),
                                          tf.expand_dims(pred_attr, 0)), 0)

        plot_kitti(scan.numpy(), img.numpy(), pred_c.numpy(),
                   pred_attr.numpy(), pred_clf.numpy(), label)
Beispiel #8
0
def main():
    lines, charmap, inv_charmap = load_dataset(max_length=SEQ_LEN,
                                               max_n_examples=MAX_N_EXAMPLES,
                                               data_dir=DATA_DIR)

    # Dataset iterator
    def inf_train_gen():
        while True:
            np.random.shuffle(lines)
            for i in range(0, len(lines) - BATCH_SIZE + 1, BATCH_SIZE):
                yield np.array([[charmap[c] for c in l]
                                for l in lines[i:i + BATCH_SIZE]],
                               dtype='int32')

    iter_data = inf_train_gen()

    G = Generator(dim_hidden=DIM, seq_len=SEQ_LEN, dim_output=len(charmap))
    D = Discriminator(dim_hidden=DIM, seq_len=SEQ_LEN)

    for iteration in range(ITERS):

        if iteration == 1:
            G.summary()
            D.summary()

        start = time()

        cost_G = train_G(G, D, G.trainable_variables)

        for _ in range(CRITIC_ITERS):
            text = next(iter_data)
            cost_D, gp = train_D(text, charmap, G, D, D.trainable_variables)

        if iteration % 1 == 0:
            fake_inputs = G(1, SEQ_LEN)
            fake_inputs_discrete = tf.argmax(fake_inputs[0], -1)
            print(
                'cost_G: {:.3f}\t cost_D: {:.3f}|{:.3f}\t used: {:.3f}'.format(
                    cost_G, cost_D, gp,
                    time() - start))
            print(''.join(inv_charmap[i] for i in text[0]), ' || ',
                  ''.join(inv_charmap[i] for i in fake_inputs_discrete))
Beispiel #9
0
from networks.lstm_encoder_decoder import LSTMEncoderDecoder

from utils import dataset
import numpy as np

# Load dataset
params = dataset.load_dataset("input_dummy.txt", "output_dummy.txt")
input_items = params['input']
output_items = params['output']


input_vocab_size = input_items['size']
output_vocab_size = output_items['size']
input_embed_size = 100
num_input = 100
num_memory_units = 1000
num_layers = 4


Xi = input_items['sentences'][0]
Yi = output_items['sentences'][0]

encoder_decoder = LSTMEncoderDecoder(input_vocab_size, output_vocab_size, input_embed_size, input_embed_size,
                                     num_layers, num_memory_units)

# res, error = encoder_decoder.forward(Xi, Yi)
predictions = encoder_decoder.predict(Xi, beam_size = 3)

for predList in predictions:
    logProb = predList[0]
    prediction = predList[1]
Beispiel #10
0
import tensorflow as tf
tf.set_random_seed(seed_value)
# 5. For layers that introduce randomness like dropout, make sure to set seed values
# model.add(Dropout(0.25, seed=seed_value))
#6 Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)


must_train = True
must_test = True
base_model = 'firenet_tf'
dataset = 'fismo_black' # int(time.time())
ds_folder, get_dataset = load_dataset(dataset)
save_path = os.path.join('.', 'models', 'saved', base_model, dataset)
if not os.path.exists(save_path):
    os.makedirs(save_path)

### Training
if must_train:

    ds_path = os.path.join('.', 'datasets', ds_folder)
    x_train, y_train, x_val, y_val = get_dataset(ds_path, resize=(64,64))

    # Normalize data.
    x_train = preprocess(x_train)
    x_val = preprocess(x_val)

    # summary
Beispiel #11
0
ap.add_argument("-c",
                "--conf",
                required=True,
                help="path to the configuration file")
ap.add_argument(
    "-n",
    "--hard-negatives",
    type=int,
    default=-1,
    help="flag indicating whether or not hard negatives should be used")
args = vars(ap.parse_args())

# load the configuration file and the initial dataset
print("[INFO] loading dataset...")
conf = Conf(args["conf"])
(data, labels) = dataset.load_dataset(conf["features_path"], "features")

# check to see if the hard negatives flag was supplied
if args["hard_negatives"] > 0:
    print("[INFO] loading hard negatives...")
    (hardData, hardLabels) = dataset.load_dataset(conf["features_path"],
                                                  "hard_negatives")
    data = np.vstack([data, hardData])
    labels = np.hstack([labels, hardLabels])

# train the classifier
print("[INFO] training classifier...")
model = SVC(kernel="linear", C=conf["C"], probability=True, random_state=42)
model.fit(data, labels)

# dump the classifier to file
Beispiel #12
0
def evaluate():

    model = MonoNet(cfg['model'])
    model.load_weights(WEIGHTS)
    print('[info] model weights loaded.')

    dataset = load_dataset(DATASET, cfg, False)

    n_classes = cfg['model']['n_classes']

    cham_res = []
    ap = np.zeros((n_classes - 1)) if n_classes > 1 else np.zeros((1))
    ap_count = np.zeros((n_classes - 1)) if n_classes > 1 else np.zeros((1))

    for step, (inputs, label) in enumerate(dataset):

        calib = inputs['calib']

        pred_c, pred_attr, pred_clf = model(
            [inputs['img'], calib['calib'], calib['img_orig']])

        pred_c = tf.squeeze(pred_c, 0)
        pred_attr = tf.squeeze(pred_attr, 0)
        pred_clf = tf.squeeze(pred_clf, 0)

        scores = tf.expand_dims(tf.reduce_max(pred_clf, 1), -1)
        pred_clf = tf.expand_dims(tf.math.argmax(pred_clf, 1), -1)
        scores = tf.where(pred_clf != cfg['model']['n_classes'] - 1, scores,
                          tf.zeros_like(scores))

        pred_c, pred_attr, pred_clf, scores = tf_utils.objectness_mask(
            pred_c, pred_attr, pred_clf, scores, SCORE_THRESH)

        if pred_c.shape[0] == 0: continue

        if NMS:
            boxes = tf.squeeze(
                tf_utils.scenenet_to_aabb(tf.expand_dims(pred_c, 0),
                                          tf.expand_dims(pred_attr, 0)), 0)
            nms_inds = helpers.nms(boxes.numpy(), scores.numpy(),
                                   cfg['std']['max_labels'], NMS_THRESH)
            pred_c = tf.gather(pred_c, nms_inds)
            pred_attr = tf.gather(pred_attr, nms_inds)
            pred_clf = tf.gather(pred_clf, nms_inds)
            pred_boxes = tf.gather(boxes, nms_inds)
        else:
            pred_boxes = tf.squeeze(
                tf_utils.scenenet_to_aabb(tf.expand_dims(pred_c, 0),
                                          tf.expand_dims(pred_attr, 0)), 0)

        label_mask = tf.cast(label['bbox_3d'], tf.bool)[:, :, 0, 0]
        label_clf = tf.boolean_mask(label['clf'], label_mask)

        label_c = tf.boolean_mask(label['c_3d'], label_mask)
        label_attr = tf.boolean_mask(label['attr'], label_mask)
        label_boxes = tf_utils.scenenet_to_aabb(tf.expand_dims(label_c, 0),
                                                tf.expand_dims(label_attr, 0))

        cham_dist, _, _ = losses.chamfer_loss(label['c_3d'],
                                              tf.expand_dims(pred_c, 0))
        cham_res.append(cham_dist)

        res, classes = eval.ap_eval(tf.squeeze(label_boxes, 0), label_clf,
                                    pred_boxes, pred_clf, IOU_THRESH)

        for c_idx, c in enumerate(classes):
            ap[c] += res[c_idx][2]
            ap_count[c] += 1

        if step % 100 == 0 and step != 0:
            print('step: {}, Chamfer: {:.4f} AP: {:}, mAP: {:.2f}'.format(
                step, np.mean(cham_res), ap / ap_count,
                np.mean(ap / ap_count)))
            break

    print('-------------')
    print('step: {}, Chamfer: {:.4f} AP: {:}, mAP: {:.2f}'.format(
        step, np.mean(cham_res), ap / ap_count, np.mean(ap / ap_count)))

    return np.mean(ap / ap_count)
Beispiel #13
0
    'snr': 18,
    'modulations': ['8PSK', 'BPSK', 'QPSK', 'PAM4', 'GFSK', 'CPFSK', 'AM-SSB'],
    'scale': 50,
    'num_samples': 50,
    'time_sample': np.arange(128),
    'thrup': 0.1,
    'thrdn': 0.1,
    'resampling_factor': 200,
    'stretch_factor': 1000,
    'stop_after': 10000,
    'stop_neuron': 4,
    'pause': 500
}
tot_num_samples = settings['num_samples'] * len(settings['modulations'])
dataset, _ = load_dataset('./data/radioML/RML2016.10a_dict.pkl',
                          snr=settings['snr'],
                          scale=settings['scale'])
# Define delta modulators
modulator = [
    AsynchronousDeltaModulator(settings['thrup'], settings['thrdn'],
                               settings['resampling_factor']),
    AsynchronousDeltaModulator(settings['thrup'], settings['thrdn'],
                               settings['resampling_factor'])
]
# Prepare stimulus
print("- Preparing input stimulus")
indices = []
times = []
Y = []
stimulation = (len(settings['time_sample']) * settings['stretch_factor'] /
               1e3) * ms
Beispiel #14
0
model = create_MNIST_CNN()
# ~ model = createModel1()
# ~ model.compile(loss='categorical_crossentropy', optimizer='SGD', metrics=['accuracy'])
model.compile(loss='categorical_crossentropy',
              optimizer=KO.Adam(lr=0.0001, epsilon=1e-8),
              metrics=['accuracy'])

# print(model.summary())

validation_split = 0.1

# Train the network
if CNN_config.use_fit_generator:
    X_train, X_test, Y_train, Y_test, validation_data = repack_dataset(
        validation_split, *load_dataset())
    # This will do preprocessing and realtime data augmentation:
    datagen = getDataGen()
    datagen.fit(X_train)
    # ~ plot_losses = TrainingPlot()
    training_log = TrainingLog()
    timer = Timer().start()
    earlyStopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
    modelCheckpoint = ModelCheckpoint('best_model.h5',
                                      monitor='val_acc',
                                      mode='max',
                                      verbose=1,
                                      save_best_only=True)

    model.fit_generator(
        datagen.flow(X_train, Y_train, batch_size=CNN_config.batch_size),
Beispiel #15
0
def main():
    dataset = load_dataset()
Beispiel #16
0
__author__ = 'saideeptalari'
import tensorflow as tf
from utils.dataset import load_dataset,plot_dataset,encode
import numpy as np

trainPath = "dataset/mnist_train.csv"
testPath = "dataset/mnist_test.csv"

#load training and testing sets
trainData,trainLabels = load_dataset(trainPath)
testData,testLabels = load_dataset(testPath)

#convert to floats
trainData = trainData.astype("float32")
testData = testData.astype("float32")

#encode labels
trainLabels = encode(trainLabels)
testLabels = encode(testLabels)
#normalize
trainData /= 255
testData /= 255

#define no.of.nodes for each layer
num_input = 784
num_hidden_1 = 256
num_hidden_2 = 256
num_output = 10

num_epochs = 20
#       'AM-DSB', 'AM-SSB',
#       'PAM4', 'WBFM'
print("- Importing dataset")
settings = {
    'snr': 18,
    'modulations': ['8PSK', 'BPSK', 'QPSK'],
    'num_samples': 20,
    'time_sample': np.arange(128),
    'thrup': 0.01,
    'thrdn': 0.01,
    'resampling_factor': 200,
    'stretch_factor': 50,
    'pause': 5000
}
tot_num_samples = settings['num_samples']*len(settings['modulations'])
dataset = load_dataset('./data/radioML/RML2016.10a_dict.pkl', snr=settings['snr'], normalize=True)
# Define delta modulators
modulator = [
    AsynchronousDeltaModulator(settings['thrup'], settings['thrdn'], settings['resampling_factor']),
    AsynchronousDeltaModulator(settings['thrup'], settings['thrdn'], settings['resampling_factor'])
]
# Prepare stimulus
print("- Preparing input stimulus")
indices = []
times = []
Y = []
stimulation = (len(settings['time_sample'])*settings['resampling_factor']*settings['stretch_factor']/1e3)*ms
duration = (stimulation+settings['pause']*ms)*settings['num_samples']*len(settings['modulations'])
to = 0.0*ms
for (i, mod) in tqdm(enumerate(settings['modulations'])):
    for j in range(settings['num_samples']):
Beispiel #18
0
path = "data/Confocal_MICE/raw/training_raw.npy"

# Load the training image
data = np.load(path).astype(np.float32)

# We are loading the histogram from the 'Convallaria-1-CreateNoiseModel' notebook
# histogram = np.load(path + 'noiseModel.npy')

# Create a NoiseModel object from the histogram.
# noiseModel = hist_noise_model.NoiseModel(histogram)


logging.config.fileConfig("configs/logging.conf")

# TODO: how to deal with the noise model being a part of the model config as opposed to something generated from the data
model_config = yaml2namespace(join('unittests', 'assets', 'ppn2v_model.yaml'))
training_config = yaml2namespace(join('configs', 'training_config.yaml'))
# data, mean, std = load_data(data, batch_size=training_config.batch_size,
#                             patch_size=training_config.patch_size,
#                             num_pix=100 * 100 // 32, supervised=False)


train_data, val_data, mean, std = load_dataset('data/test_records')
model = PPN2V(model_config, mean, std)

profiler_v2.warmup()
profiler_v2.start(logdir='model_instances/cheese')
model.train(train_data, training_config)
profiler_v2.stop()
Beispiel #19
0
model = create_MNIST_CNN()
model = create_MNIST_CNN1() # the worstest
model.compile(**COMPILE_MODES.get(2))
# print(model.summary())


########################################################################
validation_split = 0.1
# earlyStopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
# modelCheckpoint = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)


# Train the network
if CNN_config.use_fit_generator:
    X_train, X_test, Y_train, Y_test, validation_data = repack_dataset(validation_split, *load_dataset())
    # This will do preprocessing and realtime data augmentation:
    datagen = getDataGen()
    datagen.fit(X_train)
    plot_losses = TrainingPlot()
    timer = Timer().start()
    # earlyStopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
    # modelCheckpoint = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

    model.fit_generator(
        datagen.flow(X_train, Y_train, batch_size=CNN_config.batch_size),
        len(X_train),
        CNN_config.epochs,
        validation_data=validation_data,
        callbacks=[
            plot_losses, 
Beispiel #20
0
__author__ = 'saideeptalari'
import tensorflow as tf
from utils.dataset import load_dataset, encode
from sklearn.model_selection import train_test_split
import numpy as np

batch_size = 128
"""
trainData,trainLabels = load_dataset("dataset/mnist_train.csv")
testData,testLabels = load_dataset("dataset/mnist_test.csv")
"""
Data, Labels = load_dataset("dataset/mnist_test.csv")
trainData, testData, trainLabels, testLabels = train_test_split(Data, Labels)

trainData = trainData.astype("float32")
testData = testData.astype("float32")

trainLabels = encode(trainLabels)
testLabels = encode(testLabels)

x = tf.placeholder(tf.float32, shape=[None, 784])
y_ = tf.placeholder(tf.float32, shape=[None, 10])

keep_prob = tf.placeholder(tf.float32)
keep_prob1 = tf.placeholder(tf.float32)


def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)
Beispiel #21
0
def train(train_csv_path, val_csv_path, model_dir):
    startTime = time()
    n_class = 102
    batch_size = 12
    capacity = 24  # 内存中存储的最大数据容量

    learning_rate_base = 1e-3  # 最初学习率
    learning_rate_decay = 0.5  # 学习率的衰减率
    learning_rate_step = 1  # 喂入多少轮BATCH-SIZE以后,更新一次学习率。一般为总样本数量/BATCH_SIZE

    xs, ys = dataset.load_dataset(train_csv_path)
    xs_val, ys_val = dataset.load_dataset(val_csv_path)

    images_num = len(xs)  # 总的图片数量
    labels_num = len(ys)  # 总的图片数量
    batch_num = math.ceil(images_num / batch_size)  # 每一个轮次执行的批次数量

    images_val_num = len(xs_val)
    labels_val_num = len(ys_val)
    batch_val_num = math.floor(len(xs_val) / batch_size)  # 每一个轮次执行的批次数量

    print(
        "images:{0} labels:{1} class:{2} batch_size:{3} batch_num:{4}".format(
            images_num, labels_num, n_class, batch_size, batch_num))
    print(
        "images_val:{0} labels_val:{1} class:{2} batch_size:{3} batch_val_num:{4}"
        .format(images_val_num, labels_val_num, n_class, batch_size,
                batch_val_num))

    # 通过读取列表来载入批量的图片及标签
    image_batch, label_batch = batch_pretreatment.get_batch(xs,
                                                            ys,
                                                            224,
                                                            224,
                                                            batch_size,
                                                            shuffle=False,
                                                            num_threads=1,
                                                            capacity=capacity)
    image_batch_test, label_batch_test = batch_pretreatment.get_batch(
        xs_val, ys_val, 224, 224, batch_size, shuffle=False, capacity=capacity)

    gloabl_steps = tf.Variable(
        0, trainable=False)  # 计数器,用来记录运行了几轮的BATCH_SIZE,初始为0,设置为不可训练
    learning_rate = tf.train.exponential_decay(learning_rate_base,
                                               gloabl_steps,
                                               learning_rate_step,
                                               learning_rate_decay,
                                               staircase=True)

    Y_hat, model_params = ResNet50(input_shape=[224, 224, 3], classes=n_class)

    # Y_hat = tf.sigmoid(Z)

    X = model_params['input']
    Y_true = tf.placeholder(dtype=tf.float32, shape=[None, n_class])

    Z = model_params['out']['Z']  # Z 没有经过了 softmax 层 A 经过了
    A = model_params['out']['A']  # Z 没有经过了 softmax 层 A 经过了

    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=Z, labels=Y_true))
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    correct_prediction = tf.equal(tf.arg_max(A, 1), tf.arg_max(Y_true, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    saver = tf.train.Saver(max_to_keep=10)

    with tf.Session() as sess:
        try:
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())

            if not os.path.exists(model_dir):
                os.mkdir(model_dir)

            # 如果有检查点文件,读取最新的检查点文件,恢复各种变量
            ckpt = tf.train.latest_checkpoint(model_dir)
            if ckpt is not None:
                print("Loading last checkpoint file ...")
                saver.restore(sess, ckpt)
                print("Checkpoint file loading complete!")
                # 加载所有的参数 从这里就可以直接使用模型进行预测,或者继续训练
            else:
                print("There is no checkpoint file that can be loaded!ss")

            coord = tf.train.Coordinator()  # 使用协调器Coordinator来管理线程
            threads = tf.train.start_queue_runners(coord=coord, sess=sess)

            epoch_start_time = time()
            print("Begin train...")
            epoch = 0
            while epoch < 100:
                epoch += 1
                for batch in range(1, batch_num):
                    images, labels = sess.run([image_batch, label_batch])
                    labels = batch_pretreatment.onehot(
                        labels, n_class=n_class)  # 用 one-hat 形式对标签进行编码

                    if batch % 10 == 0:
                        _, l, acc = sess.run([train_step, loss, accuracy],
                                             feed_dict={
                                                 X: images,
                                                 Y_true: labels
                                             })

                        epoch_end_time = time()
                        print(
                            "epoch:{0} batch:{1} loss:{2} accuracy:{3} takes:{4}"
                            .format(epoch, batch, l, acc,
                                    epoch_end_time - epoch_start_time))
                        epoch_start_time = epoch_end_time
                    else:
                        sess.run(train_step,
                                 feed_dict={
                                     X: images,
                                     Y_true: labels
                                 })
                    if batch % (batch_num // 2) == 0:
                        val_loss = []
                        val_acc = []
                        for i in range(batch_val_num):
                            images_test, labels_test = sess.run(
                                [image_batch_test, label_batch_test])
                            labels_test = batch_pretreatment.onehot(
                                labels_test,
                                n_class=n_class)  # 用 one-hat 形式对标签进行编码

                            l, acc = sess.run([loss, accuracy],
                                              feed_dict={
                                                  X: images_test,
                                                  Y_true: labels_test
                                              })
                            val_loss.append(l)
                            val_acc.append(acc)

                        print("val_loss:{0} acc:{1}".format(
                            sum(val_loss) / batch_val_num,
                            sum(val_acc) / batch_val_num))

                        saver.save(sess,
                                   os.path.join(
                                       model_dir,
                                       'epoch-{0}-batch-{1}.ckpt'.format(
                                           epoch, batch)),
                                   global_step=epoch)
                        print(
                            "-" * 20,
                            'Model:epoch-{0}-batch-{1}.ckpt saved successfully'
                            .format(epoch, batch), "-" * 20)

            saver.save(sess, os.path.join(model_dir, 'flower_model.ckpt'))
            print("Optimization Finished!")

            coord.request_stop()  # 通知其他线程关闭
            coord.join(threads)  # join 操作等待其他线程结束,其他所有线程关闭之后,这一函数才能返回

            duration = time() - startTime
            print("Train Finished takes:", "{:.2f}".format(duration))
        except Exception as ex:
            print(ex)
        finally:
            saver.save(sess, os.path.join(model_dir, 'flower_model.ckpt'))
            sess.close()
Beispiel #22
0
def main():

    n_classes = 200
    img_shape = (64, 64, 3)

    timed_wrapper = TimedWrapper(
        load_model())  # Measure model prediction runtime.
    remote_wrapper = RemoteModelWrapper(timed_wrapper,
                                        do_hash=True)  # Remember best adv. ex.

    with SampleGenerator(shape=img_shape, n_threads=1,
                         queue_lengths=100) as sample_gen:

        X_train, y_train, X_val, y_val = load_dataset(
            "/path/to/tiny/imagenet", ds_cache_path='tiny_imagenet_cached.npz')

        with MultiBoundaryAttack(model=remote_wrapper,
                                 X_known=np.vstack([X_train, X_val]),
                                 y_known=np.concatenate([y_train, y_val]),
                                 n_classes=n_classes,
                                 sample_gen=sample_gen,
                                 cached_conf=None) as attack:

            model_mean_query_time_history = []
            time_max = 89  # As allowed in the rules (batch of 10 in 900 seconds)
            time_bonus = 0  # Bonus to account for unfair models (see below)

            i = 0
            for (file_name, image, label) in read_images():

                time_start = default_timer()

                # Time calculation: 90 seconds per image are allowed. Models are allowed to use (40ms*1000calls) = 40s.
                # This leaves 50 seconds for the attacker.
                #
                # But if the model is MUCH slower than allowed, then the attacker has less time and can't finish.
                # To balance the scales, we detect this, and allow ourselves to use up some extra seconds.
                # If we don't do this (and hard-abort at 90 seconds), attacks that don't count time would have an advantage vs us.
                if i % 5 == 0 and len(model_mean_query_time_history) > 3:
                    avg_model_time = np.mean(model_mean_query_time_history)
                    if avg_model_time > 55e-3:
                        time_left_for_attacker = 89 - (1000 * avg_model_time)
                        time_bonus = min(55 - time_left_for_attacker, 50)
                        print(
                            "Model is slower than allowed (would leave only {:.1f} seconds for the attacker). "
                            "Will now use up to {:.1f} additional seconds per image."
                            .format(time_left_for_attacker, time_bonus))
                    elif time_bonus > 0:
                        time_bonus = 0
                        print(
                            "Model speed seems OK now. Reverting to the 90s time limit."
                        )

                print("Image {}:".format(i))
                image = np.float32(image)

                remote_wrapper.adv_set_target(orig_img=image,
                                              is_targeted=True,
                                              label=label)
                attack.run_attack(image=image,
                                  label=label,
                                  is_targeted=True,
                                  start_with_fgm=True,
                                  fgm_acceptable_dist=10,
                                  time_max=time_max + time_bonus)
                safe_adversarial = remote_wrapper.adv_get_best_img()

                if safe_adversarial is None:
                    safe_adversarial = np.uint8(image)
                    print("Couldn't find an adversarial! This sucks!")
                else:
                    dist = util.eval_distance(image, safe_adversarial)
                    print("Final distance: {}".format(dist))

                # Save model query time stats.
                rt_median, rt_mean, rt_std = timed_wrapper.get_runtime_stats()
                print(
                    "Response time of model: median={:.1f}ms, mean={:.1f}ms, std={:.1f}ms"
                    .format(rt_median * 1e3, rt_mean * 1e3, rt_std * 1e3))
                timed_wrapper.reset_runtime_stats()
                if remote_wrapper.adv_get_n_calls() > 100:
                    model_mean_query_time_history.append(rt_mean)

                time_elapsed_s = default_timer() - time_start
                print("Queried the model {} times.".format(
                    remote_wrapper.adv_get_n_calls()))
                print("Attack for this image took {} seconds.".format(
                    time_elapsed_s))
                print()

                store_adversarial(file_name, safe_adversarial)
                i += 1

            attack_complete()
Beispiel #23
0
def main():
    parser = argparse.ArgumentParser()
    # parameters
    parser.add_argument("--epoch",
                        default=100,
                        type=int,
                        help="the number of epoches needed to train")
    parser.add_argument("--lr",
                        default=1e-3,
                        type=float,
                        help="the learning rate")
    parser.add_argument("--train_data_path",
                        default='data/train.tsv',
                        type=str,
                        help="train dataset path")
    parser.add_argument("--dev_data_path",
                        default=None,
                        type=str,
                        help="dev dataset path")
    parser.add_argument("--test_data_path",
                        default='data/test.tsv',
                        type=str,
                        help="test dataset path")
    parser.add_argument("--train_batch_size",
                        default=128,
                        type=int,
                        help="the batch size")
    parser.add_argument("--dev_batch_size",
                        default=64,
                        type=int,
                        help="the batch size")
    parser.add_argument("--test_batch_size",
                        default=64,
                        type=int,
                        help="the batch size")
    parser.add_argument("--embedding_path",
                        default='data/sgns.renmin.bigram-char',
                        type=str,
                        help="pre-trained word embeddings path")
    parser.add_argument("--embedding_size",
                        default=300,
                        type=int,
                        help="the word embedding size")
    parser.add_argument("--hidden_size",
                        default=512,
                        type=int,
                        help="the hidden size")
    parser.add_argument("--fine_tuning",
                        default=True,
                        type=bool,
                        help="whether fine-tune word embeddings")
    parser.add_argument("--early_stopping",
                        default=15,
                        type=int,
                        help="Tolerance for early stopping (# of epochs).")
    parser.add_argument("--load_model",
                        default='results/20_Model_best.pt',
                        help="load pretrained model for testing")
    args = parser.parse_args()

    if not args.train_data_path:
        logger.info("please input train dataset path")
        exit()
    if not (args.dev_data_path or args.test_data_path):
        logger.info("please input dev or test dataset path")
        exit()

    TEXT, LABEL, vocab_size, word_embeddings, train_iter, dev_iter, test_iter, tag_dict = \
                dataset.load_dataset(args.train_data_path, args.dev_data_path, \
                 args.test_data_path, args.embedding_path, args.train_batch_size, \
                                        args.dev_batch_size, args.test_batch_size)

    idx_tag = {}
    for tag in tag_dict:
        idx_tag[tag_dict[tag]] = tag

    model = BiLSTM_CRF(args.embedding_size, args.hidden_size, vocab_size,
                       tag_dict, word_embeddings)
    if torch.cuda.is_available():
        model = model.cuda()

    # cost_test = []
    # start = time.perf_counter()
    # train_dev_size = len(train_iter)
    # train_size = int(train_dev_size*0.9)
    train_data, dev_data = dataset.train_dev_split(train_iter, 0.9)
    # for batch in train_data:
    #     print(batch)
    #     exit()

    # train_data = lambda: islice(train_iter,0,train_size)
    # dev_data = lambda: islice(train_iter,train_size,train_dev_size)
    # train_data = islice(train_iter,0,train_size)
    # dev_data = islice(train_iter,train_size,train_dev_size)
    if args.load_model:
        model.load_state_dict(torch.load(args.load_model, map_location='cpu'))
        # p, r, f1, eval_loss, all_assess = eval_model(model, dev_data, idx_tag)
        # logger.info('Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \
        #                             eval_loss, p, r, f1)
        p, r, f1, eval_loss, all_assess = eval_model(model, test_iter, idx_tag)
        logger.info('LOC Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['LOC']['P'], all_assess['LOC']['R'], all_assess['LOC']['F'])
        logger.info('PER Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['PER']['P'], all_assess['PER']['R'], all_assess['PER']['F'])
        logger.info('ORG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                all_assess['ORG']['P'], all_assess['ORG']['R'], all_assess['ORG']['F'])
        logger.info('Micro_AVG Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
                                    p, r, f1)
        return

    best_score = 0.0
    for epoch in range(args.epoch):
        # train_data_ = copy.deepcopy(train_data)
        # dev_data_ = copy.deepcopy(dev_data)
        # train_model(model, train_data_, dev_data_, epoch, args.lr, idx_tag)
        train_loss, p, r, f1, eval_loss = train_model(model, train_data,
                                                      dev_data, epoch, args.lr,
                                                      idx_tag)

        logger.info('Epoch:%d, Training Loss:%.4f', epoch, train_loss)
        logger.info('Epoch:%d, Eval Loss:%.4f, Eval P:%.4f, Eval R:%.4f, Eval F1:%.4f', \
                                    epoch, eval_loss, p, r, f1)
        # p, r, f1, eval_loss, all_assess = eval_model(model,  test_iter, idx_tag)
        # logger.info('Test Loss:%.4f, Test P:%.4f, Test R:%.4f, Test F1:%.4f', \
        #                             eval_loss, p, r, f1)
        if f1 > best_score:
            best_score = f1
            torch.save(
                model.state_dict(),
                'results/%d_%s_%s.pt' % (epoch, 'Model', str(best_score)))
Beispiel #24
0
batch_size = 128
nb_classes = 10
nb_epoch = 12

# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
nb_filters = 32
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (3, 3)

#load training data
trainData, trainLabels = load_dataset("dataset/mnist_train.csv")
trainLabels = encode(trainLabels)

#load testing data
testData, testLabels = load_dataset("dataset/mnist_test.csv")
testLabels = encode(testLabels)

#convert to float
trainData = trainData.astype("float32")
testData = testData.astype("float32")

#refactoring train and test sets
X_train, Y_train, X_test, Y_test = trainData, trainLabels, testData, testLabels

#normalize data
X_train /= 255
Beispiel #25
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(BERT_MODELS),
    )
    parser.add_argument(
        "--output_dir",
        default=None,
        type=str,
        required=True,
        help="The output directory where the model predictions and checkpoints will be written.",
    )

    # Other parameters
    parser.add_argument(
        "--train_path",
        default="",
        type=str,
        help="Path to training dataset (.csv file).",
    )
    parser.add_argument(
        "--eval_path",
        default="",
        type=str,
        help="Path to evaluation dataset (.csv file).",
    )
    parser.add_argument(
        "--cache_dir",
        default="",
        type=str,
        help="Cache directory to store the pre-trained models downloaded from s3.",
    )
    parser.add_argument(
        "--max_seq_length",
        default=128,
        type=int,
        help="The maximum total input sequence length after tokenization. Sequences longer "
             "than this will be truncated, sequences shorter will be padded.",
    )

    parser.add_argument(
        "--per_gpu_train_batch_size",
        default=8,
        type=int,
        help="Batch size per GPU/CPU for training."
    )
    parser.add_argument(
        "--eval_batch_size",
        default=8,
        type=int,
        help="Batch size per GPU/CPU for evaluation."
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        default=1,
        type=int,
        help="Number of updates steps to accumulate before performing a backward/update pass."
    )
    parser.add_argument(
        "--learning_rate",
        default=5e-5,
        type=float,
        help="The initial learning rate for Adam."
    )
    parser.add_argument(
        "--weight_decay",
        default=0.0,
        type=float,
        help="Weight deay if we apply some."
    )
    parser.add_argument(
        "--adam_epsilon",
        default=1e-8,
        type=float,
        help="Epsilon for Adam optimizer."
    )
    parser.add_argument(
        "--max_grad_norm",
        default=1.0,
        type=float,
        help="Max gradient norm."
    )
    parser.add_argument(
        "--num_train_epochs",
        default=3,
        type=int,
        help="Total number of training epochs to perform."
    )
    parser.add_argument(
        "--max_steps",
        default=-1,
        type=int,
        help="If > 0: set total number of training steps to perform. Override num_train_epochs."
    )
    parser.add_argument(
        "--warmup_steps",
        default=0,
        type=int,
        help="Linear warmup over warmup_steps."
    )

    parser.add_argument(
        "--logging_steps",
        default=100,
        type=int,
        help="Log every X updates steps."
    )
    parser.add_argument(
        "--save_steps",
        default=2000,
        type=int,
        help="Save checkpoint every X updates steps."
    )
    parser.add_argument(
        "--no_cuda",
        action="store_true",
        help="Avoid using CUDA when available"
    )
    parser.add_argument(
        "--overwrite_output_dir",
        action="store_true",
        help="Overwrite the content of the output directory"
    )
    parser.add_argument(
        "--overwrite_cache",
        action="store_true",
        help="Overwrite the cached training sets"
    )
    parser.add_argument(
        "--seed",
        default=42,
        type=int,
        help="random seed for initialization"
    )

    parser.add_argument(
        "--do_train",
        action="store_true",
        help="Whether to run training on train set."
    )
    parser.add_argument(
        "--do_eval",
        action="store_true",
        help="Whether to run evaluation on dev/test set."
    )
    parser.add_argument(
        "--evaluate_during_training",
        action="store_true",
        help="Run evaluation during training at each logging step."
    )

    parser.add_argument(
        "--fp16",
        action="store_true",
        help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit",
    )
    parser.add_argument(
        "--fp16_opt_level",
        type=str,
        default="O1",
        help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']."
             "See details at https://nvidia.github.io/apex/amp.html",
    )
    parser.add_argument(
        "--local_rank",
        default=-1,
        type=int,
        help="For distributed training: local_rank"
    )
    parser.add_argument(
        "--server_ip",
        default="",
        type=str,
        help="For distant debugging."
    )
    parser.add_argument(
        "--server_port",
        default="",
        type=str,
        help="For distant debugging."
    )
    args = parser.parse_args()

    if (
            os.path.exists(args.output_dir)
            and os.listdir(args.output_dir)
            and args.do_train
            and not args.overwrite_output_dir
    ):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome.".format(
                args.output_dir
            )
        )
    elif not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
        os.makedirs(args.output_dir)

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port), redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count()
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )
    logger.info("Training/evaluation parameters %s", args)

    # Set seed
    set_seed(args)

    # Training
    if args.do_train:
        # Load pretrained model and tokenizer
        model, tokenizer = get_model_and_tokenizer(args)

        # Calculate batch size for data loader
        batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu)

        def _get_dataloader(_train_dataset, _collate_fn):
            train_sampler = RandomSampler(_train_dataset) if args.local_rank == -1 \
                else DistributedSampler(_train_dataset)

            return DataLoader(
                _train_dataset,
                sampler=train_sampler,
                batch_size=batch_size,
                collate_fn=_collate_fn
            )

        # fine-tune on gloss selection task
        logger.info("\nTraining...")
        train_dataset = load_dataset(args.train_path, tokenizer, args.max_seq_length)
        train_dataloader = _get_dataloader(train_dataset, collate_batch)

        global_step, tr_loss = train(args, model, tokenizer, train_dataloader, args.evaluate_during_training)
        logger.info(" global_step = %s, average loss = %s", global_step, tr_loss)

        # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
        if args.local_rank == -1 or torch.distributed.get_rank() == 0:
            logger.info("Saving model checkpoint to %s", args.output_dir)
            # Save a trained model, configuration and tokenizer using `save_pretrained()`.
            # They can then be reloaded using `from_pretrained()`
            model_to_save = (
                model.module if hasattr(model, "module") else model
            )  # Take care of distributed/parallel training
            model_to_save.save_pretrained(args.output_dir)
            tokenizer.save_pretrained(args.output_dir)

            # Good practice: save your training arguments together with the trained model
            torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

    # Evaluation
    if args.do_eval and args.local_rank in [-1, 0]:
        # Load fine-tuned model and vocabulary
        model = BertWSD.from_pretrained(args.output_dir)
        tokenizer = BertTokenizer.from_pretrained(args.output_dir)
        model.to(args.device)

        eval_loss = evaluate(args, model, tokenizer)
        print(f"Evaluation loss: {eval_loss}")