Beispiel #1
0
def eval_acc(inference, val_loader, ctx, return_meta=False):
    mtc_acc = Accuracy()
    mtc_acc.reset()

    feature_nest, y_nest, y_hat_nest = [], [], []
    for X, y in val_loader:
        X = X.as_in_context(ctx[0])
        y = y.as_in_context(ctx[0])
        with autograd.record(train_mode=False):
            y_hat, features = inference(X)

        # update metric
        mtc_acc.update([y], [y_hat])

        if return_meta:
            y_nest.extend(y.asnumpy())
            feature_nest.extend(features.asnumpy())
            y_hat_nest.extend(y_hat.asnumpy())

    feature_nest = np.array(feature_nest)
    y_nest = np.array(y_nest)
    y_hat_nest = np.array(y_hat_nest)

    if return_meta:
        return mtc_acc.get()[1], y_nest, y_hat_nest, feature_nest

    return mtc_acc.get()[1]
Beispiel #2
0
    def eval(self, inference, val_loader, log=True, target=True, epoch=True):
        """
        Evaluate the model
        :param inference: network
        :param val_loader: data loader
        :param log: log flag
        :param target: target flag for updating the record and log
        :param epoch: epoch flag for updating the record and log
        :return:
        """
        mtc_acc = Accuracy()
        mtc_acc.reset()
        # val_loader.reset()

        feature_nest, y_nest, y_hat_nest = [], [], []
        for X, Y in val_loader:
            X_lst = split_and_load(X, self.args.ctx, even_split=False)
            Y_lst = split_and_load(Y, self.args.ctx, even_split=False)

            for x, y in zip(X_lst, Y_lst):
                y_hat, features = inference(x)
                # update metric
                mtc_acc.update([y], [y_hat])

                y_nest.extend(y.asnumpy())
                feature_nest.extend(features.asnumpy())
                y_hat_nest.extend(y_hat.asnumpy())

        feature_nest = np.array(feature_nest)
        y_nest = np.array(y_nest).astype(int)
        y_hat_nest = np.array(y_hat_nest)

        if log:
            target_key = 'Tgt' if target else 'Src'
            epoch_key = 'Epoch' if epoch else 'Iter'
            record = self.cur_epoch if epoch else self.cur_iter

            if mtc_acc.get()[1] > self.records[epoch_key]['%s-Acc' %
                                                          target_key]:
                if target:
                    self.records[epoch_key][epoch_key] = record
                self.records[epoch_key]['%s-Acc' %
                                        target_key] = mtc_acc.get()[1]
                self.records[epoch_key]['%s-label' % target_key] = y_nest
                self.records[epoch_key]['%s-preds' % target_key] = y_hat_nest
                self.records[epoch_key]['%s-features' %
                                        target_key] = feature_nest

                self.save_params(inference, 0, epoch_key)

            self.logger.update_scalar(
                '%s [%d]: Eval-Acc-%s' % (epoch_key, record, target_key),
                mtc_acc.get()[1])
            if self.sw:
                self.sw.add_scalar('Acc/Eval-%s-Acc-%s' % (epoch, target_key),
                                   mtc_acc.get()[1],
                                   global_step=record)

        return mtc_acc.get()[1], y_nest, y_hat_nest, feature_nest
Beispiel #3
0
    def train_block(self, data_iter: DataLoader, docs: Sequence[Document]) -> float:
        acc = Accuracy()
        for dids, sids, data, label in tqdm(data_iter, leave=False):
            # batch_size, sequence_length, input_size -> sequence_length, batch_size, input_size
            X = nd.transpose(data, axes=(1, 0, 2)).as_in_context(self.ctx)
            # batch_size, sequence_length -> sequence_length, batch_size
            Y = label.T.as_in_context(self.ctx)
            state = self.model.begin_state(batch_size=X.shape[1], ctx=self.ctx)
            for s in state:
                s.detach()
            with autograd.record():
                output, state = self.model(X, state)
                l = self.loss(output, Y)
            l.backward()
            grads = [param.grad(self.ctx) for param in self.model.collect_params().values()]
            clip_global_norm(grads, self.model.rnn_layer.clip * X.shape[0] * X.shape[1])

            # sequence_length, batch_size -> batch_size, sequence_length
            for batch, (preds, labels) in enumerate(zip(nd.argmax(output, axis=2).T, label)):
                sen = docs[dids[batch].asscalar()].sentences[sids[batch].asscalar()]
                sequence_length = len(sen)
                preds = preds[:sequence_length]
                labels = labels[:sequence_length]
                acc.update(labels=labels, preds=preds)
            self.trainer.step(data.shape[0])
        return float(acc.get()[1])
Beispiel #4
0
    def evaluate_block(self, data_iter: DataLoader,
                       docs: Sequence[Document]) -> float:
        """

        :param data_iter:
        :param docs:
        :return:
        """
        self.decode_block(data_iter=data_iter, docs=docs)
        if self.chunking:
            acc = ChunkF1()
            for doc in docs:
                for sen in doc.sentences:
                    acc.update(labels=sen[to_gold(self.key)],
                               preds=sen[self.key])
        else:
            acc = Accuracy()
            for doc in docs:
                for sen in doc.sentences:
                    labels = nd.array([
                        self.label_map.cid(label)
                        for label in sen[to_gold(self.key)]
                    ])
                    preds = nd.array(
                        [self.label_map.cid(pred) for pred in sen[self.key]])
                    acc.update(labels=labels, preds=preds)
        return acc.get()[1]
Beispiel #5
0
    def eval_epoch(self):
        self.is_train = False
        meter = Accuracy()
        meter.reset()

        for X, y in self.test_loader:
            X = X.as_in_context(self.ctx[0])
            y = y.as_in_context(self.ctx[0])

            y_hat, features = self.net(X)
            meter.update([y], [y_hat])

        acc = meter.get()[1]
        logging.info('Test  - Epoch {}, Iter {}, Acc {:.2f} %'.format(
            self.cur_epoch, self.cur_iter, acc * 100))

        if acc > self.eval_tracker['Acc']:
            self.eval_tracker.update({
                'Epoch': self.cur_epoch,
                'Iter': self.cur_iter,
                'Acc': acc
            })

        self.net.save_parameters('{}_{}_{}_{:.2f}.params'.format(
            self.cfg.META.CKPT_PATH, self.cur_epoch, self.cur_iter, acc))
Beispiel #6
0
def run_training(net, trainer, train_dataloader, val_dataloader, epochs,
                 model_path, context):
    loss_fn = mx.gluon.loss.SoftmaxCrossEntropyLoss()

    for e in range(epochs):
        train_acc = Accuracy()
        val_acc = Accuracy()
        train_loss = 0.
        total_items = 0

        for i, (data, label) in enumerate(train_dataloader):
            items_per_iteration = data.shape[0]
            total_items += items_per_iteration

            data = data.as_in_context(context)
            label = label.as_in_context(context)

            with autograd.record():
                output = net(data)
                output = output.reshape((-1, 3))
                label = label.reshape((-1, 1))
                loss = loss_fn(output, label)

            loss.backward()
            trainer.step(items_per_iteration)

            train_loss += loss.mean().asscalar()
            train_acc.update(label.flatten(), output.argmax(axis=1).flatten())

        for i, (data, label) in enumerate(val_dataloader):
            data = data.as_in_context(context)
            label = label.as_in_context(context)

            output = net(data)
            output = output.reshape((-1, 3))
            val_acc.update(
                label.reshape(-1, 1).flatten(),
                output.argmax(axis=1).flatten())

        print(
            "Epoch {}. Current Loss: {:.5f}. Train accuracy: {:.3f}, Validation accuracy: {:.3f}."
            .format(e, train_loss / total_items,
                    train_acc.get()[1],
                    val_acc.get()[1]))

    net.save_parameters(model_path)
    return model_path
Beispiel #7
0
def validate(net, val_loader, gpu_id, train_index2words, val_index2words):
    metric = BleuMetric(pred_index2words=train_index2words,
                        label_index2words=val_index2words)
    metruc_acc = Accuracy()
    metruc_acc.reset()
    metric.reset()
    for batch in tqdm.tqdm(val_loader):
        batch = [x.as_in_context(mx.gpu(gpu_id)) for x in batch]
        image, label, label_len = batch
        predictions, alphas = net(image, None, None)
        for n, l in enumerate(label_len):
            l = int(l.asscalar())
            la = label[n, 1:l]
            pred = predictions[n, :]
            metric.update(la, pred)
            metruc_acc.update(la, predictions[n, :(l - 1)])
    return metric.get()[1], metruc_acc.get()[1]
Beispiel #8
0
    def train_block(self, data_iter: DataLoader,
                    docs: Sequence[Document]) -> float:
        """

        :param data_iter:
        :param sens:
        :return:
        """
        acc = Accuracy()
        for data, label in tqdm(data_iter, leave=False):
            data = data.as_in_context(self.ctx)
            label = label.as_in_context(self.ctx)
            with autograd.record():
                output = self.model(data)
                l = self.loss(output, label)
            l.backward()
            for preds, labels in zip(nd.argmax(output, axis=1), label):
                acc.update(labels=labels, preds=preds)
            self.trainer.step(data.shape[0])
        return float(acc.get()[1])
Beispiel #9
0
def validate(net, val_loader, gpu_id, train_index2words, val_index2words):
    metric = BleuMetric(pred_index2words=train_index2words,
                        label_index2words=val_index2words)
    metruc_acc = Accuracy()
    metruc_acc.reset()
    metric.reset()
    for batch in tqdm.tqdm(val_loader):
        batch = [Variable(torch.from_numpy(x.asnumpy()).cuda()) for x in batch]
        image, label, label_len = batch
        label = label.long()
        label_len = label_len.long()
        predictions, alphas = net(image, None, None)
        for n, l in enumerate(label_len):
            l = int(l.data.cpu().numpy().squeeze().tolist())
            la = label[n, 1:l].data.cpu().numpy()
            pred = predictions[n, :].data.cpu().numpy()
            metric.update(la, pred)
            metruc_acc.update(
                mx.nd.array(la),
                mx.nd.array(predictions[n, :(l - 1)].data.cpu().numpy()))
    return metric.get()[1], metruc_acc.get()[1]
Beispiel #10
0
    def evaluation(self, x, y_true):
        """
        输入一组数据和标签返回正确率和交叉熵(y与y_true)
        :param x: data
        :param y_true: label(one-hot-like)
        :return: (accuracy,crossentropy)
        """
        #处理onehot标签得到真实标签
        nor_label = nd.argmax(y_true, axis=1, keepdims=False)  #type:nd.NDArray
        #predict不一定是softmax过的值 应将其归一化 使其相加值为1
        #否则会出现NaN的情况
        raw_pred = self.predict(x)  #type:nd.NDArray
        y_pred = raw_pred / raw_pred.sum(axis=1,
                                         keepdims=True)  #type:nd.NDArray
        y_pred_sparse = y_pred.argmax(axis=1, keepdims=False)
        ##开始求各参数
        acc = Accuracy()
        acc.update(labels=[nor_label], preds=[y_pred])
        acc_val = acc.get()[1]
        # 交叉熵
        cro = CrossEntropy()
        cro.update(labels=[nor_label], preds=[y_pred])
        cro_val = cro.get()[1]

        #确定average方式 如果预测值中的每个item的长度大于2表示是多分类 则使用macro方式统计 否则采用binary
        average = "macro" if len(raw_pred[0]) > 2 else "binary"
        # Recall
        recall = recall_score(nor_label.asnumpy(),
                              y_pred_sparse.asnumpy(),
                              average=average,
                              pos_label=self.pos_label)
        # 精确率
        precision = precision_score(nor_label.asnumpy(),
                                    y_pred_sparse.asnumpy(),
                                    average=average,
                                    pos_label=self.pos_label)
        # 返回
        return acc_val, cro_val, recall, precision
Beispiel #11
0
        losses = []
        with ag.record():
            for x, y in zip(data, label):
                z = model(x)
                # computes softmax cross entropy loss
                l = loss_fn(z, y)
                output.append(z)
                losses.append(l)
        # backpropagate the error for one iteration.
        for l in losses:
            l.backward()
        # Update network weights
        trainer.step(BATCH_SIZE)
        # Update metric
        metric.update(label, output)
    str1 = 'Epoch [{}], Accuracy {:.4f}'.format(epoch, metric.get()[1])
    str2 = '~Samples/Sec {:.4f}'.format(BATCH_SIZE * (i + 1) /
                                        (time.time() - tick_0))
    print('%s  %s' % (str1, str2))
    metric.reset()

elapsed = time.perf_counter() - start
print('elapsed: {:0.3f}'.format(elapsed))

# use Accuracy as the evaluation metric
metric = Accuracy()
for data, label in test_data:
    data = split_and_load(data, ctx_list=ctx, batch_axis=0)
    label = split_and_load(label, ctx_list=ctx, batch_axis=0)
    outputs = []
    for x in data:
Beispiel #12
0
def main(train_list,
         val_list,
         model,
         exp,
         saved_model,
         batch_size,
         optimizer,
         nb_epochs,
         augment,
         max_lr,
         min_lr,
         loss_function,
         train_all,
         nb_frames,
         eager,
         params=None,
         **kwargs):

    print("Unused arguments:", kwargs)

    setname = train_list.split(os.sep)[0]
    # Timestamp to name experiment folder
    xptime = strftime("%Y-%m-%d_%Hh%Mm%Ss", gmtime())
    xp_folder = "experiments/%s-%s-%s_%s" % (setname, model, exp, xptime)
    # Make folder
    mkdir_p(xp_folder)
    mkdir_p(os.path.join(xp_folder, 'checkpoints'))
    mkdir_p(os.path.join(xp_folder, 'tb'))
    print("\nSaving experiment data to:", xp_folder)

    # Save command (as well as possible)
    with open(os.path.join(xp_folder, 'command.sh'), "w") as f:
        command = " ".join(sys.argv[:]) + "\n"
        f.write(command)

    # Save employed parameters for future reference
    if params is not None:
        write_params(os.path.join(xp_folder, 'params.json'), params)

    #############
    # Callbacks #
    #############

    # Helper: Save the model.
    ckpt_fmt = os.path.join(
        xp_folder, 'checkpoints', model + '-' + exp +
        '.{epoch:03d}-loss{val_loss:.3f}-acc{val_acc:.3f}.hdf5')
    checkpointer = ModelCheckpoint(filepath=ckpt_fmt,
                                   verbose=1,
                                   save_best_only=True,
                                   monitor='val_acc')

    # Helper: TensorBoard
    tb = HistoryKeeper(logdir=os.path.join(xp_folder),
                       keys=['val_acc', 'val_loss', 'train_time', 'val_time'])

    # Helper: Stop when we stop learning.
    # early_stopper = EarlyStopper(patience=15)

    # Helper: Terminate when finding a NaN loss
    nan_term = TerminateOnNaN()

    callbacks = [tb, checkpointer, nan_term]
    #############

    #############
    #  Loading  #
    #############
    if augment:
        augmenter = default_augmenter_vid(strip_size=4)
    else:
        augment = False
        augmenter = None

    # Dataset classes
    train_data = ArrayData(train_list,
                           nb_frames=nb_frames,
                           augmenter=augmenter,
                           eager=eager)
    val_data = ArrayData(val_list,
                         nb_frames=nb_frames,
                         augmenter=None,
                         eager=eager,
                         encoder=train_data.get_encoder())

    # Saving encoder
    with open(os.path.join(xp_folder, 'encoder.pkl'), 'wb') as f:
        pickle.dump(train_data.get_encoder(), f)

    # Train loader
    train_loader = DataLoader(train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              last_batch='keep',
                              num_workers=10)
    nb_samples = len(train_data)  # loader should provide the number of sampĺes

    # Validation loader
    val_loader = DataLoader(val_data,
                            batch_size=batch_size,
                            shuffle=False,
                            last_batch='keep',
                            num_workers=10)
    nb_validation = len(
        val_data)  # loader should provide the number of sampĺes

    # Compute number of steps
    steps_per_epoch = math.ceil(nb_samples / batch_size)
    validation_steps = math.ceil(nb_validation / batch_size)

    # The model
    net = ResearchModels(train_data.nb_classes,
                         model,
                         saved_model,
                         input_shape=train_data.shape,
                         train_all=train_all).model

    # A little more verbosity
    print("************************************")
    if train_all:
        print("Train all layers.")
    print("Max lr:", max_lr, " Min lr:", min_lr)
    print("Batch size:", batch_size)
    print(nb_samples, "training samples,", steps_per_epoch, "steps per epoch")
    print(nb_validation, "validation samples,", validation_steps,
          "validation steps")
    print("Optimizer:", optimizer)
    if augment:
        print("Using data augmentation")
    else:
        print("WARNING: Not using data augmentation")
    print("************************************")

    ############################
    #   Loss and Optimization  #
    ############################

    trainer = gluon.Trainer(net.collect_params(), optimizer,
                            {'learning_rate': max_lr})

    if loss_function == 'categorical_crossentropy':
        loss_fn = gluon.loss.SoftmaxCrossEntropyLoss()
        loss_fn.hybridize()

    ############
    # Training #
    ############
    progress_desc = "Super epoch %03d - acc %.3f - loss %.3f  "
    acc = Accuracy()
    start_time = time()

    super_epoch_size = 250
    # Learning rate decay
    iteration = 1
    decay_alpha = 0.01**0.25
    lr = max_lr

    for epoch in range(1, nb_epochs + 1):
        train_loss, val_loss = 0., 0.
        nb_batches = 0
        tic = time()
        acc.reset()

        start_training = time()
        t = tqdm(range(super_epoch_size), unit='epochs')
        for _ in t:
            for data, label in train_loader:
                # Learning rate decay
                if iteration % 10000 == 0:
                    lr *= decay_alpha
                    trainer.set_learning_rate(lr)
                    print("Learning rate updated to", lr)
                iteration += 1

                current_batch_size = data.shape[0]
                data = data.copyto(mx.gpu(0))
                label = label.copyto(mx.gpu(0))

                with autograd.record():
                    output = net(data)
                    loss = loss_fn(output, label)
                loss.backward()
                # print(mx.nd.log_softmax(output[0], axis=-1), label[0])

                # update parameters
                trainer.step(current_batch_size)

                # calculate training metrics
                train_loss += loss.mean().asscalar()
                # accuracy(output, label)
                acc.update(preds=output, labels=label)

                nb_batches += 1

            t.set_description(progress_desc %
                              (epoch, acc.get()[1], train_loss / nb_batches))

        train_time = time() - start_training

        train_loss /= steps_per_epoch * super_epoch_size
        train_acc = acc.get()[1]

        acc.reset()
        start_val = time()
        # calculate validation accuracy
        tval = tqdm(val_loader,
                    leave=False,
                    desc='Running validation',
                    unit='batch')
        for data, label in tval:
            data = data.copyto(mx.gpu(0))
            label = label.copyto(mx.gpu(0))

            # Compute outputs
            output = net(data)
            loss = loss_fn(output, label)

            # Compute metrics
            val_loss += loss.mean().asscalar()
            # val_acc += accuracy(output, label)
            acc.update(preds=output, labels=label)

        val_time = time() - start_val

        val_loss /= validation_steps
        val_acc = acc.get()[1]

        print(
            "Epoch %d: loss %.3f, acc %.3f, val_loss %.3f, val_acc %.3f, in %.1f sec"
            % (epoch, train_loss, train_acc, val_loss, val_acc, time() - tic))
        print(
            "--------------------------------------------------------------------------------"
        )

        stop = False
        train_info = {
            'epoch': epoch,
            'loss': train_loss,
            'acc': train_acc,
            'val_loss': val_loss,
            'val_acc': val_acc,
            'train_time': train_time,
            'val_time': val_time
        }
        for cb in callbacks:
            if cb(net, train_info):
                stop = True

        if stop:
            break
        print()

    hours, rem = divmod(time() - start_time, 3600)
    days, hours = divmod(hours, 24)
    minutes, seconds = divmod(rem, 60)

    print("%d training epochs in %dd, %dh%dm%.2fs." %
          (nb_epochs, int(days), int(hours), int(minutes), seconds))
Beispiel #13
0
def run_training(net, trainer, train_dataloader, val_dataloader, intents_count,
                 epochs, model_path, context):
    intent_loss_fn = mx.gluon.loss.SoftmaxCrossEntropyLoss()
    max_val_accuracy = 0
    best_model_path = ''

    for e in range(epochs):
        intent_train_acc = Accuracy()
        slot_train_acc = Accuracy()

        intent_val_acc = Accuracy()
        slot_val_acc = Accuracy()

        train_loss = 0.
        total_items = 0

        for i, (data, valid_lengths, entities,
                intent) in enumerate(train_dataloader):
            length = data.shape[1]
            items_per_iteration = data.shape[0]
            total_items += items_per_iteration

            data = data.as_in_context(context)
            intent = intent.as_in_context(context)
            entities = entities.as_in_context(context)

            hidden_state = net.elmo_container[0].begin_state(
                mx.nd.zeros, batch_size=items_per_iteration, ctx=context)
            mask = get_data_mask(length, valid_lengths, items_per_iteration,
                                 context)

            with autograd.record():
                intents, slots = net(data, hidden_state, mask)
                intents = intents.reshape((-1, intents_count))
                intent = intent.reshape((-1, 1))
                loss_intent = intent_loss_fn(intents, intent)

                # crf accepts seq_len x bs x channels
                score, slots_seq = net.crf(slots.transpose(axes=(1, 0, 2)))
                neg_log_likelihood = net.crf.neg_log_likelihood(
                    slots.transpose(axes=(1, 0, 2)), entities)
                loss = 0.1 * loss_intent.mean(
                ) + 0.9 * neg_log_likelihood.mean()

            loss.backward()
            trainer.step(1)

            train_loss += loss.mean().asscalar()
            intent_train_acc.update(intent.flatten(),
                                    intents.argmax(axis=1).flatten())
            slot_train_acc.update(entities, slots_seq)

        for i, (data, valid_lengths, entities,
                intent) in enumerate(val_dataloader):
            items_per_iteration = data.shape[0]
            length = data.shape[1]

            data = data.as_in_context(context)
            intent = intent.as_in_context(context)
            entities = entities.as_in_context(context)

            hidden_state = net.elmo_container[0].begin_state(
                mx.nd.zeros, batch_size=items_per_iteration, ctx=context)
            mask = get_data_mask(length, valid_lengths, items_per_iteration,
                                 context)

            intents, slots = net(data, hidden_state, mask)
            intents = intents.reshape((-1, intents_count))
            intent = intent.reshape((-1, 1))

            score, slots_seq = net.crf(slots.transpose(axes=(1, 0, 2)))

            intent_val_acc.update(intent.flatten(),
                                  intents.argmax(axis=1).flatten())
            slot_val_acc.update(entities, slots_seq)

        print(
            "Epoch {}. Current Loss: {:.5f}. \n"
            "Intent train accuracy: {:.3f}, Slots train accuracy: {:.3f}, \n"
            "Intent valid accuracy: {:.3f}, Slot val accuracy: {:.3f}".format(
                e, train_loss / total_items,
                intent_train_acc.get()[1],
                slot_train_acc.get()[1],
                intent_val_acc.get()[1],
                slot_val_acc.get()[1]))

        if max_val_accuracy < slot_val_acc.get()[1]:
            max_val_accuracy = slot_val_acc.get()[1]
            best_model_path = model_path + '_{:04d}.params'.format(e)
            net.save_parameters(best_model_path)
            print("Improvement observed")
        else:
            print("No improvement")

    return best_model_path
Beispiel #14
0

def load_net(param_file="net.params", ctx=cpu(0)):
    net = SimpleNet()
    net.load_parameters(param_file, ctx=ctx)
    return net


def get_val_data(transformer, batch_size=128):
    mnist_valid = gluon.data.vision.FashionMNIST(train=False)
    valid_data = gluon.data.DataLoader(
        mnist_valid.transform_first(transformer),
        batch_size=batch_size,
        num_workers=4)
    return valid_data


if __name__ == "__main__":
    ctx = gpu(0) if context.num_gpus() else cpu(0)
    net = load_net("net.params", ctx=ctx)
    valid_data = get_val_data(transformer)

    val_acc = Accuracy()
    for data, label in valid_data:
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.predict_mode():
            out = net(data)
            val_acc.update(label, out)
    print("Accuray: ", val_acc.get()[1])