예제 #1
0
def main(fname, ce_model_path, fe_model_path, cl_model_path, w2v_path,
         vocab_size, infersent_source, infersent_model):
    sys.path.append(infersent_source)
    from helpers import Preprocess, SentEnc, label2ind
    from test_cl import predict_labels

    mode = 'ce+f'
    device = 'cpu'
    ce_dim = 512
    senc_dim = 4096
    window = 2
    f_dim = 43
    fenc_dim = 40
    n_classes = 6
    if device != 'cpu': torch.cuda.set_device(device)

    ce_model = CEModel(senc_dim, ce_dim // 2, window * 4)
    ce_model = ce_model.to(device)
    fe_model = FeatEnc(f_dim, fenc_dim)
    fe_model = fe_model.to(device)
    cl_model = ClassificationModel(ce_dim + fenc_dim, n_classes).to(device)

    ce_model.load_state_dict(torch.load(ce_model_path, map_location=device))
    fe_model.load_state_dict(torch.load(fe_model_path, map_location=device))
    cl_model.load_state_dict(torch.load(cl_model_path, map_location=device))

    label2ind = [
        'attributes', 'data', 'header', 'metadata', 'derived', 'notes'
    ]

    print('loading word vectors...')
    senc = SentEnc(infersent_model,
                   w2v_path,
                   vocab_size,
                   device=device,
                   hp=False)
    prep = Preprocess()

    snames = get_sheet_names(fname, file_type='xls')

    result = dict()
    for sname in snames:
        tarr, n, m = get_sheet_tarr(fname, snames[0], file_type='xls')
        ftarr = get_feature_array(fname, snames[0], file_type='xls')
        table = dict(table_array=tarr, feature_array=ftarr)

        sentences = set()
        for row in tarr:
            for c in row:
                sentences.add(c)
        senc.cache_sentences(list(sentences))

        labels, probs = predict_labels(table, cl_model, ce_model, fe_model,
                                       senc, mode, device)
        probs = np.exp(probs)
        labels = np.vectorize(lambda x: label2ind[x])(labels)
        result[sname] = dict(text=tarr.tolist(),
                             labels=labels.tolist(),
                             labels_probs=probs.tolist())
    return result
def run_classification_model(model_path, image_path):

    # [560, 560] for ensemble, [560] or [448] for single models like ResNeXt
    model_input_sizes = [560, 560]

    model = ClassificationModel(model_path, model_input_sizes, useGPU=True)
    species, vals = model.predict_image(image_path, 3)

    for i in range(0, len(species)):
        print('%d) %s\tlikelihood: %f' % (i + 1, species[i], vals[i]))
예제 #3
0
def loadModel(cnnDir):
    model = ClassificationModel(len(classAssoc_inv),
                                args.backbone,
                                pretrained=True,
                                convertToInstanceNorm=False)
    startEpoch = 0
    modelStates = glob.glob(os.path.join(cnnDir, '*.pth'))
    for sf in modelStates:
        epoch, _ = os.path.splitext(sf.replace(cnnDir, ''))
        startEpoch = max(startEpoch, int(epoch))
    
    if startEpoch > 0:
        state = torch.load(open(os.path.join(cnnDir, str(startEpoch)+'.pth'), 'rb'), map_location=lambda storage, loc: storage)
        model.load_state_dict(state['model'])
        print('Loaded model epoch {}.'.format(startEpoch))
    else:
        state = {
            'model': None,
            'loss_train': [],
            'loss_val': [],
            'oa_train': [],
            'oa_val': []
        }
        print('Initialized new model.')
    model.to(args.device)
    return model, state, startEpoch
예제 #4
0
파일: 3_test.py 프로젝트: bkellenb/da-dl4eo
def loadModel(baseline=False):
    if baseline:
        modelDir = cnnDir_baseline
    else:
        modelDir = cnnDir

    model = ClassificationModel(len(classAssoc_inv),
                                args.backbone,
                                pretrained=True,
                                convertToInstanceNorm=False)
    startEpoch = 0
    modelStates = glob.glob(os.path.join(modelDir, '*.pth'))
    for sf in modelStates:
        epoch, _ = os.path.splitext(sf.replace(modelDir, ''))
        startEpoch = max(startEpoch, int(epoch))
    
    state = torch.load(open(os.path.join(modelDir, str(startEpoch)+'.pth'), 'rb'), map_location=lambda storage, loc: storage)
    model.load_state_dict(state['model'])
    print('Loaded model epoch {}.'.format(startEpoch))

    model.to(args.device)
    return model
예제 #5
0
parser.add_argument('--width', type=int, default=224,
					help='Width of the input')

parser.add_argument('--in_channels', type=int, default=3,
					help='Size of the input')

parser.add_argument('--n_measures', type=int, default=10,
					help='Number of time measurements')

args = parser.parse_args()


#------------------------------------------------------------------------------
#	Create model
#------------------------------------------------------------------------------
model = ClassificationModel('efficientnet_b0', num_classes=100, pretrained=True)
model.eval()
model.summary(input_shape=(args.in_channels, args.height, args.width), device='cpu')


# #------------------------------------------------------------------------------
# #   Measure time
# #------------------------------------------------------------------------------
# input = torch.randn([1, args.in_channels, args.height, args.width], dtype=torch.float)
# if args.use_cuda:
# 	model.cuda()
# 	input = input.cuda()

# for _ in range(10):
# 	model(input)
예제 #6
0
def get_model(model_path):
    '''Takes the model path and returns the model.'''
    return ClassificationModel(model_path,
                               image_sizes=IMAGE_SIZES,
                               useGPU=True)
def evaluating(config, save_path, GPUs=0):
    logger.info("==> Start evaluating.........")
    if isinstance(GPUs, list):
        logger.warning("We use the fisrt gpu for evaluating")
        GPUs = [GPUs[0]]
    elif isinstance(GPUs, int):
        GPUs = [GPUs]
    else:
        raise RuntimeError("Check GPUs for evaluate")
    config.num_gpus = 1

    with tf.Graph().as_default():
        logger.info('==> Preparing datasets...')
        dataset = ModelNetDataset(config, config.num_threads)
        flat_inputs = dataset.flat_inputs
        val_init_op = dataset.val_init_op
        val_vote_init_op = dataset.val_vote_init_op

        is_training_pl = tf.placeholder(tf.bool, shape=())

        ClassificationModel(flat_inputs[0], is_training_pl, config=config)
        tower_logits = []
        tower_labels = []
        tower_object_inds = []
        for i, igpu in enumerate(GPUs):
            with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                with tf.device('/gpu:%d' % (igpu)), tf.name_scope(
                        'gpu_%d' % (igpu)) as scope:
                    flat_inputs_i = flat_inputs[i]
                    model = ClassificationModel(flat_inputs_i,
                                                is_training_pl,
                                                config=config)
                    logits = model.logits
                    labels = model.labels
                    tower_logits.append(logits)
                    tower_labels.append(labels)
                    object_inds = model.inputs['object_inds']
                    tower_object_inds.append(object_inds)

        save_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope='ClassificationModel')
        saver = tf.train.Saver(save_vars)

        # Create a session
        tfconfig = tf.ConfigProto()
        tfconfig.gpu_options.allow_growth = True
        tfconfig.allow_soft_placement = True
        tfconfig.log_device_placement = False
        sess = tf.Session(config=tfconfig)

        ops = {
            'val_init_op': val_init_op,
            'val_vote_init_op': val_vote_init_op,
            'is_training_pl': is_training_pl,
            'tower_logits': tower_logits,
            'tower_labels': tower_labels,
            'tower_object_inds': tower_object_inds
        }

        # Load the pretrained model
        init = tf.global_variables_initializer()
        sess.run(init)
        saver.restore(sess, save_path)
        logger.info("==> Model loaded in file: %s" % save_path)

        # Evaluating
        logger.info("==> Evaluating Last epoch")
        val_one_epoch(sess, ops, dataset, 'LastEpoch')
        val_vote_one_epoch(sess, ops, dataset, 'LastEpoch', num_votes=100)

    return
def training(config):
    with tf.Graph().as_default():
        # Get dataset
        logger.info('==> Preparing datasets...')
        dataset = ModelNetDataset(config, config.num_threads)
        config.num_classes = dataset.num_classes
        print("==> config.num_classes: {}".format(config.num_classes))
        flat_inputs = dataset.flat_inputs
        train_init_op = dataset.train_init_op
        val_init_op = dataset.val_init_op
        val_vote_init_op = dataset.val_vote_init_op

        is_training_pl = tf.placeholder(tf.bool, shape=())

        # Set learning rate and optimizer
        lr_scheduler = StepScheduler(
            'learning_rate', config.base_learning_rate * config.batch_size *
            config.num_gpus / 16.0, config.decay_rate, config.decay_epoch,
            config.max_epoch)
        learning_rate = tf.get_variable(
            'learning_rate', [],
            initializer=tf.constant_initializer(config.base_learning_rate *
                                                config.batch_size *
                                                config.num_gpus / 16.0),
            trainable=False)
        if config.optimizer == 'sgd':
            optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                   momentum=config.momentum)
        elif config.optimizer == 'adam':
            optimizer = tf.train.AdamOptimizer(learning_rate)
        elif config.optimizer == 'adamW':
            optimizer = AdamWeightDecayOptimizer(
                learning_rate=learning_rate,
                weight_decay_rate=config.weight_decay,
                exclude_from_weight_decay=["bias"])

        # -------------------------------------------
        # Get model and loss on multiple GPU devices
        # -------------------------------------------
        # Allocating variables on CPU first will greatly accelerate multi-gpu training.
        # Ref: https://github.com/kuza55/keras-extras/issues/21
        ClassificationModel(flat_inputs[0], is_training_pl, config=config)
        tower_grads = []
        tower_logits = []
        tower_labels = []
        tower_object_inds = []
        total_loss_gpu = []
        total_classification_loss_gpu = []
        total_weight_loss_gpu = []
        for i, igpu in enumerate(config.gpus):
            with tf.variable_scope(tf.get_variable_scope(), reuse=True):
                with tf.device('/gpu:%d' % (igpu)), tf.name_scope(
                        'gpu_%d' % (igpu)) as scope:
                    flat_inputs_i = flat_inputs[i]
                    model = ClassificationModel(flat_inputs_i,
                                                is_training_pl,
                                                config=config)
                    logits = model.logits
                    labels = model.labels
                    model.get_loss()
                    losses = tf.get_collection('losses', scope)
                    weight_losses = tf.get_collection('weight_losses', scope)
                    classification_losses = tf.get_collection(
                        'classification_losses', scope)
                    total_loss = tf.add_n(losses, name='total_loss')
                    total_weight_loss = tf.add_n(weight_losses,
                                                 name='total_weight_loss')
                    total_classification_loss = tf.add_n(
                        classification_losses,
                        name='total_classification_loss')
                    grad_var_list = tf.trainable_variables()
                    if config.optimizer == 'adamW':
                        grads = tf.gradients(total_classification_loss,
                                             grad_var_list)
                    else:
                        grads = tf.gradients(total_loss, grad_var_list)
                    grads = list(zip(grads, grad_var_list))
                    tower_grads.append(grads)
                    tower_logits.append(logits)
                    tower_labels.append(labels)
                    object_inds = model.inputs['object_inds']
                    tower_object_inds.append(object_inds)
                    total_loss_gpu.append(total_loss)
                    total_classification_loss_gpu.append(
                        total_classification_loss)
                    total_weight_loss_gpu.append(total_weight_loss)

        # Average losses from multiple GPUs
        total_loss = tf.reduce_mean(total_loss_gpu)
        total_classification_loss = tf.reduce_mean(
            total_classification_loss_gpu)
        total_weight_loss = tf.reduce_mean(total_weight_loss_gpu)

        # Get training operator
        grads = average_gradients(tower_grads, grad_norm=config.grad_norm)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.apply_gradients(grads)

        # Add ops to save and restore all the variables.
        save_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                      scope='ClassificationModel')
        saver = tf.train.Saver(save_vars)

        # Create a session
        tfconfig = tf.ConfigProto()
        tfconfig.gpu_options.allow_growth = True
        tfconfig.allow_soft_placement = True
        tfconfig.log_device_placement = False
        sess = tf.Session(config=tfconfig)

        # Initialize variables, resume if needed
        if config.load_path is not None:
            init = tf.global_variables_initializer()
            sess.run(init)
            saver.restore(sess, config.load_path)
            logger.info("==> Model loaded in file: %s" % config.load_path)
        else:
            # Init variables
            init = tf.global_variables_initializer()
            sess.run(init)
            logger.info("==> Init global")

        # Printing model parameters
        all_params = [
            v for v in tf.trainable_variables() if 'weights' in v.name
        ]
        logger.info("==> All params")
        for param in all_params:
            logger.info(str(param))
        all_params_size = tf.reduce_sum(
            [tf.reduce_prod(v.shape) for v in all_params])
        all_params_size_np = sess.run(all_params_size)
        logger.info(
            "==> Model have {} total Params".format(all_params_size_np))

        ops = {
            'train_init_op': train_init_op,
            'val_init_op': val_init_op,
            'val_vote_init_op': val_vote_init_op,
            'is_training_pl': is_training_pl,
            'tower_logits': tower_logits,
            'tower_labels': tower_labels,
            'tower_object_inds': tower_object_inds,
            'loss': total_loss,
            'classification_loss': total_classification_loss,
            'weight_loss': total_weight_loss,
            'train_op': train_op,
            'learning_rate': learning_rate
        }

        best_acc = 0
        best_epoch = 0
        best_vote_acc = 0
        best_vote_epoch = 0
        for epoch in range(1, config.max_epoch + 1):
            lr = lr_scheduler.step()
            tic1 = time.time()
            train_one_epoch(sess, ops, epoch, lr)
            tic2 = time.time()
            logger.info(
                "Epoch: {}, total time: {:2f}s, learning rate: {:.5f}, "
                "best acc: {:3%}/{}, best vote acc: {:3%}/{}".format(
                    epoch, tic2 - tic1, lr, best_acc, best_epoch,
                    best_vote_acc, best_vote_epoch))
            logger.info("==> Validating...")
            acc, cls_acc = val_one_epoch(sess, ops, dataset, epoch)
            best_acc = max(best_acc, acc)
            best_epoch = epoch if (best_acc == acc) else best_epoch
            if epoch % config.val_freq == 0:
                logger.info("==> Voting Validating...")
                vote_acc, vote_cls_acc = val_vote_one_epoch(sess,
                                                            ops,
                                                            dataset,
                                                            epoch,
                                                            num_votes=10)
                if vote_acc > best_vote_acc:
                    best_vote_acc = vote_acc
                    best_vote_epoch = epoch
                    save_path = saver.save(sess,
                                           os.path.join(
                                               config.log_dir, "best.ckpt"),
                                           global_step=epoch)
                    logger.info(
                        "==> Model saved in file: {}".format(save_path))

            if epoch % config.save_freq == 0:
                save_path = saver.save(sess,
                                       os.path.join(config.log_dir,
                                                    "model.ckpt"),
                                       global_step=epoch)
                logger.info("==> Model saved in file: {}".format(save_path))
        epoch += 1
        val_vote_one_epoch(sess, ops, dataset, epoch, num_votes=10)
        save_path = saver.save(sess,
                               os.path.join(config.log_dir, "model.ckpt"),
                               global_step=epoch)
        logger.info("==> Model saved in file: {}".format(save_path))

    return save_path
예제 #9
0
def main(spec):
    np.random.seed(spec['seed'])
    torch.manual_seed(spec['seed'])
    
    nthreads = spec['threads']
    os.environ["OMP_NUM_THREADS"] = str(nthreads)
    os.environ["OPENBLAS_NUM_THREADS"] = str(nthreads)
    os.environ["MKL_NUM_THREADS"] = str(nthreads)
    os.environ["VECLIB_MAXIMUM_THREADS"] = str(nthreads)
    os.environ["NUMEXPR_NUM_THREADS"] = str(nthreads)

    input_file = spec['cl']['input_file']
    folds_path = spec['cl']['folds']
    mode = spec['cl']['mode']
    device = spec['device']
    models_path = spec['cl']['model_path']
    ce_dim = spec['ce']['encdim']
    senc_dim = spec['senc_dim']
    window = spec['ce']['window']
    f_dim = spec['fe']['fdim']
    fenc_dim = spec['fe']['enc_dim']
    n_classes = spec['cl']['num_classes']
    infersent_model = spec['infersent_model']
    w2v_path = spec['w2v_path']
    vocab_size = spec['vocab_size']
    half_precision = False
    if device != 'cpu': torch.cuda.set_device(device)

    senc = SentEnc(infersent_model, w2v_path, 
                   vocab_size, device=device, hp=False)
    prep = Preprocess()
    with gzip.open(input_file) as infile:
        tables = np.array([json.loads(line) for li, line in enumerate(infile)])
    for i in range(len(tables)): 
        tables[i]['table_array'] = np.array(prep.clean_table_array(tables[i]['table_array']))
    folds = json.load(open(folds_path))
    ## initialize the sentence encodings
    pbar = tqdm(total=len(tables))
    pbar.set_description('initialize sent encodings:')
    sentences = set()
    for t in tables:
        for row in t['table_array']:
            for c in row:
                sentences.add(c)
    senc.cache_sentences(list(sentences))
    reports = []
    for fi, fold in enumerate(folds):
        train_tables, dev_tables, test_tables = split_train_test(tables, fold, 1)

        ce_model = CEModel(senc_dim, ce_dim//2, window*4)
        ce_model = ce_model.to(device)
        fe_model = FeatEnc(f_dim, fenc_dim)
        fe_model = fe_model.to(device)
        cl_model = ClassificationModel(ce_dim+fenc_dim, n_classes).to(device)
        
        ce_model.load_state_dict(torch.load(models_path+f'/ce_fold{fi}.model', map_location=device))
        fe_model.load_state_dict(torch.load(models_path+f'/fe_fold{fi}.model', map_location=device))
        cl_model.load_state_dict(torch.load(models_path+f'/cl_fold{fi}.model', map_location=device))
        f1macro, report, _, _, _ = predict(test_tables, cl_model, ce_model, fe_model, senc, label2ind, device=device)
        reports.append(report)
        print(f'fold {fi} test f1-macro = {f1macro}')
    dfs = [get_df(r) for r in reports]
    mean_res = reduce(lambda x, y: x.add(y, fill_value=0), dfs)/len(dfs)
    std_res = [(x-mean_res) ** 2 for x in dfs]
    std_res = reduce(lambda x, y: x.add(y, fill_value=0), dfs)
    std_res = std_res.pow(1./2)/len(dfs)
    print('mean:')
    print(tabulate.tabulate(mean_res, headers='keys', tablefmt='psql'))
    print('STD:')
    print(tabulate.tabulate(std_res, headers='keys', tablefmt='psql'))
예제 #10
0
train_ind = ind[:n]
test_ind = ind[n:]

X = X_full[train_ind]
Xs = X_full[test_ind]
Y = Y_full[train_ind]
Ys = Y_full[test_ind]

# preprocess
X = X - np.mean(X, axis=0)
sd = np.std(X, axis=0)
sd[sd == 0] = 1
X = X / sd

# apply the sghmc procedure
model = ClassificationModel(3)
model.fit(X, Y)

#get posterior samples
m, v, ms = model.predict(X)

preds = np.round(m[:, 0], 0)
c = confusion_matrix(preds, Y)
print("Correctly Classified: {}".format((c[0, 0] + c[1, 1]) / c.sum()))

# seems completely overfitted: training error = 1?

ms = ms.reshape((ms.shape[0], ms.shape[1]))
np.savetxt('posteriorsamples_PTEN.txt', ms)
np.savetxt('designmatrix_PTEN.txt', X)
예제 #11
0
def get_number_datapoints(number1, number2):

    x = x_train[np.isin(y_train, [number1, number2]), :]
    y = y_train[np.isin(y_train, [number1, number2])]

    xs = x_test[np.isin(y_test, [number1, number2]), :]
    ys = y_test[np.isin(y_test, [number1, number2])]
    return (x, y), (xs, ys)


# pick out 1 vs. 6

(x, y), (xs, ys) = get_number_datapoints(1, 6)

model = ClassificationModel()
model.fit(x, y)

m, v, ms = model.predict(xs)

# model is extremely slow - reduce layers and num_inducing
posterior_samples = ms

# save the data and posterior results
ms = ms.reshape((ms.shape[0], ms.shape[1]))
np.savetxt('MNIST_1_6_posterior.txt', ms)
np.savetxt('MNIST_1_6_data.txt', xs)

##############################################################

# pick out 1 vs 7
def main(spec):
    input_file = spec['cl']['input_file']
    folds_path = spec['cl']['folds']
    mode = spec['cl']['mode']
    device = spec['device']
    out_path = spec['cl']['model_path']
    ce_path = spec['cl']['ce_model']
    fe_path = spec['cl']['fe_model']
    ce_dim = spec['ce']['encdim']
    senc_dim = spec['senc_dim']
    window = spec['ce']['window']
    f_dim = spec['fe']['fdim']
    fenc_dim = spec['fe']['encdim']
    num_epochs = spec['cl']['epochs']
    lr = spec['cl']['lr']
    train_size = spec['cl']['train_size']
    dev_size = spec['cl']['cv_size']
    n_classes = spec['cl']['num_classes']
    infersent_model = spec['infersent_model']
    w2v_path = spec['w2v_path']
    vocab_size = spec['vocab_size']
    half_precision = False
    if device != 'cpu': torch.cuda.set_device(device)

    senc = SentEnc(infersent_model,
                   w2v_path,
                   vocab_size,
                   device=device,
                   hp=half_precision)
    prep = Preprocess()
    with gzip.open(input_file) as infile:
        tables = np.array([
            json.loads(line) for li, line in enumerate(infile)
            if li < (train_size + dev_size)
        ])
    for i in range(len(tables)):
        tables[i]['table_array'] = np.array(
            prep.clean_table_array(tables[i]['table_array']))
    folds = json.load(open(folds_path))
    ## initialize the sentence encodings
    pbar = tqdm(total=len(tables))
    pbar.set_description('initialize sent encodings:')
    sentences = set()
    for t in tables:
        for row in t['table_array']:
            for c in row:
                sentences.add(c)
        pbar.update(1)
    senc.cache_sentences(list(sentences))

    for fi, fold in enumerate(folds):
        ce_model = CEModel(senc_dim, ce_dim // 2, window * 4)
        ce_model.load_state_dict(torch.load(ce_path, map_location=device))
        ce_model = ce_model.to(device)

        fe_model = FeatEnc(f_dim, fenc_dim)
        fe_model.load_state_dict(torch.load(fe_path, map_location=device))
        fe_model = fe_model.to(device)

        cl_model = ClassificationModel(ce_dim + fenc_dim, n_classes).to(device)
        optimizer_cl = torch.optim.Adam(cl_model.parameters(), lr=lr)
        optimizer_ce = torch.optim.Adam(ce_model.parameters(), lr=lr / 100)
        optimizer_fe = torch.optim.Adam(fe_model.parameters(), lr=lr / 100)

        print(f'fold {fi} started ...')
        best_dev_loss = np.inf
        train_tables, dev_tables, test_tables = split_train_test(
            tables, fold, dev_size)

        class_weights = get_class_weights(train_tables)
        class_weights = torch.from_numpy(class_weights).float().to(device)
        loss_func = nn.NLLLoss(weight=class_weights,
                               reduction='mean').to(device)

        pbar = tqdm(total=num_epochs * (len(train_tables) + len(dev_tables)))
        pbar.set_description('tr:{:.3f}, dev:{:.3f}'.format(
            np.NaN, best_dev_loss))
        for e in range(num_epochs):
            eloss_train = 0
            eloss_dev = 0
            for ti, t in enumerate(train_tables):
                tarr = np.array(t['table_array'])
                feature_array = np.array(t['feature_array'])
                ann_array = t['annotations']
                n, m = tarr.shape

                fevtarr = get_fevectarr(feature_array, n, m, fe_model, device)
                cevtarr = get_cevectarr(tarr,
                                        ce_model,
                                        senc,
                                        device,
                                        ce_model.num_context // 4,
                                        senc_dim=4096)
                labels, targets_i, targets_j = get_annotations(ann_array, n, m)
                labels = torch.LongTensor(labels).to(device)
                fevtarr = torch.from_numpy(fevtarr).float()
                cevtarr = torch.from_numpy(cevtarr).float()
                features = torch.cat([cevtarr, fevtarr], dim=-1).to(device)
                pred = cl_model(features)

                loss = loss_func(pred[(targets_i, targets_j)], labels)
                eloss_train += loss.item()

                cl_model.zero_grad()
                loss.backward()
                optimizer_cl.step()
                optimizer_ce.step()
                optimizer_fe.step()
                pbar.update(1)
                pbar.set_description('tr:{:.3f}, dev:{:.3f}'.format(
                    eloss_train / (ti + 1), best_dev_loss))
            with torch.no_grad():
                for t in dev_tables:
                    tarr = np.array(t['table_array'])
                    feature_array = np.array(t['feature_array'])
                    ann_array = t['annotations']
                    n, m = tarr.shape

                    fevtarr = get_fevectarr(feature_array, n, m, fe_model,
                                            device)
                    cevtarr = get_cevectarr(tarr,
                                            ce_model,
                                            senc,
                                            device,
                                            ce_model.num_context // 4,
                                            senc_dim=4096)
                    labels, targets_i, targets_j = get_annotations(
                        ann_array, n, m)
                    labels = torch.LongTensor(labels).to(device)
                    fevtarr = torch.from_numpy(fevtarr).float()
                    cevtarr = torch.from_numpy(cevtarr).float()
                    features = torch.cat([cevtarr, fevtarr], dim=-1).to(device)
                    pred = cl_model(features)

                    loss = loss_func(pred[(targets_i, targets_j)], labels)
                    eloss_dev += loss.item()

                    pbar.update(1)
            eloss_train = eloss_train / len(train_tables)
            eloss_dev = eloss_dev / len(dev_tables)
            if eloss_dev < best_dev_loss:
                best_dev_loss = eloss_dev
                torch.save(cl_model.state_dict(),
                           out_path + f'/cl_fold{fi}.model')
                torch.save(fe_model.state_dict(),
                           out_path + f'/fe_fold{fi}.model')
                torch.save(ce_model.state_dict(),
                           out_path + f'/ce_fold{fi}.model')
            pbar.set_description('tr:{:.3f}, dev:{:.3f}'.format(
                eloss_train, best_dev_loss))