Ejemplo n.º 1
0
def train_net(args, epoch_size, data_path, eval_per_epoch, repeat_size,
              ckpoint_cb, sink_mode):
    """define the training method"""
    print("============== Starting Training ==============")
    # Create training dataset
    ds_train = create_dataset(args, True, training_path, 32, repeat_size)
    # Initialise model
    model = Model(resnet, net_loss, net_opt, metrics={"Accuracy": Accuracy()})
    # model = Model(resnet, net_loss, net_opt, metrics={"Accuracy": Accuracy()}, amp_level="O3") # this will not work for CPU
    epoch_per_eval = {"epoch": [], "acc": []}
    eval_cb = Evalcb(model, ds_train, eval_per_epoch, epoch_per_eval)
    model.train(epoch_size,
                ds_train,
                callbacks=[ckpoint_cb, LossMonitor(), eval_cb],
                dataset_sink_mode=sink_mode)
Ejemplo n.º 2
0
def me_train_tensor(net, input_np, label_np, epoch_size=2):
    """me_train_tensor"""
    loss = SoftmaxCrossEntropyWithLogits(is_grad=False,
                                         sparse=True,
                                         reduction="mean")
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   lr_gen(lambda i: 0.1, epoch_size), 0.9, 0.01, 1024)
    Model(net, loss, opt)
    _network = nn.WithLossCell(net, loss)
    _train_net = nn.TrainOneStepCell(_network, opt)
    _train_net.set_train()
    label_np = np.argmax(label_np, axis=-1).astype(np.int32)
    for epoch in range(0, epoch_size):
        print(f"epoch %d" % (epoch))
        _train_net(Tensor(input_np), Tensor(label_np))
Ejemplo n.º 3
0
def test_eval():
    """eval method"""
    if not os.path.exists(args.output_path):
        os.makedirs(args.output_path)

    layers = cfg.layers
    num_factors = cfg.num_factors
    topk = rconst.TOP_K
    num_eval_neg = rconst.NUM_EVAL_NEGATIVES

    ds_eval, num_eval_users, num_eval_items = create_dataset(
        test_train=False,
        data_dir=args.data_path,
        dataset=args.dataset,
        train_epochs=0,
        eval_batch_size=cfg.eval_batch_size)
    print("ds_eval.size: {}".format(ds_eval.get_dataset_size()))

    ncf_net = NCFModel(num_users=num_eval_users,
                       num_items=num_eval_items,
                       num_factors=num_factors,
                       model_layers=layers,
                       mf_regularization=0,
                       mlp_reg_layers=[0.0, 0.0, 0.0, 0.0],
                       mf_dim=16)
    param_dict = load_checkpoint(args.checkpoint_file_path)
    load_param_into_net(ncf_net, param_dict)

    loss_net = NetWithLossClass(ncf_net)
    train_net = TrainStepWrap(loss_net)
    # train_net.set_train()
    eval_net = PredictWithSigmoid(ncf_net, topk, num_eval_neg)

    ncf_metric = NCFMetric()
    model = Model(train_net,
                  eval_network=eval_net,
                  metrics={"ncf": ncf_metric})

    ncf_metric.clear()
    out = model.eval(ds_eval)

    eval_file_path = os.path.join(args.output_path, args.eval_file_name)
    eval_file = open(eval_file_path, "a+")
    eval_file.write("EvalCallBack: HR = {}, NDCG = {}\n".format(
        out['ncf'][0], out['ncf'][1]))
    eval_file.close()
    print("EvalCallBack: HR = {}, NDCG = {}".format(out['ncf'][0],
                                                    out['ncf'][1]))
def test_lenet_mnist_coverage():
    # upload trained network
    ckpt_path = '../common/networks/lenet5/trained_ckpt_file/checkpoint_lenet-10_1875.ckpt'
    net = LeNet5()
    load_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, load_dict)
    model = Model(net)

    # get training data
    data_list = "../common/dataset/MNIST/train"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size, sparse=True)
    train_images = []
    for data in ds.create_tuple_iterator(output_numpy=True):
        images = data[0].astype(np.float32)
        train_images.append(images)
    train_images = np.concatenate(train_images, axis=0)

    # initialize fuzz test with training dataset
    model_fuzz_test = ModelCoverageMetrics(model, 10, 1000, train_images)

    # fuzz test with original test data
    # get test data
    data_list = "../common/dataset/MNIST/test"
    batch_size = 32
    ds = generate_mnist_dataset(data_list, batch_size, sparse=True)
    test_images = []
    test_labels = []
    for data in ds.create_tuple_iterator(output_numpy=True):
        images = data[0].astype(np.float32)
        labels = data[1]
        test_images.append(images)
        test_labels.append(labels)
    test_images = np.concatenate(test_images, axis=0)
    test_labels = np.concatenate(test_labels, axis=0)
    model_fuzz_test.calculate_coverage(test_images)
    LOGGER.info(TAG, 'KMNC of this test is : %s', model_fuzz_test.get_kmnc())
    LOGGER.info(TAG, 'NBC of this test is : %s', model_fuzz_test.get_nbc())
    LOGGER.info(TAG, 'SNAC of this test is : %s', model_fuzz_test.get_snac())

    # generate adv_data
    loss = SoftmaxCrossEntropyWithLogits(sparse=True)
    attack = FastGradientSignMethod(net, eps=0.3, loss_fn=loss)
    adv_data = attack.batch_generate(test_images, test_labels, batch_size=32)
    model_fuzz_test.calculate_coverage(adv_data, bias_coefficient=0.5)
    LOGGER.info(TAG, 'KMNC of this adv data is : %s', model_fuzz_test.get_kmnc())
    LOGGER.info(TAG, 'NBC of this adv data is : %s', model_fuzz_test.get_nbc())
    LOGGER.info(TAG, 'SNAC of this adv data is : %s', model_fuzz_test.get_snac())
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--device_id',
                        type=int,
                        default=1,
                        help='which device the model will be trained on')
    args, model_settings = eval_config(parser)
    context.set_context(mode=context.GRAPH_MODE,
                        device_target="Davinci",
                        device_id=args.device_id)

    # Logger
    args.outputs_dir = os.path.join(
        args.log_path,
        datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
    args.logger = get_logger(args.outputs_dir)
    # show args
    args.logger.save_args(args)
    # find model path
    if os.path.isdir(args.model_dir):
        models = list(glob.glob(os.path.join(args.model_dir, '*.ckpt')))
        print(models)
        f = lambda x: -1 * int(
            os.path.splitext(os.path.split(x)[-1])[0].split('-')[0].split(
                'epoch')[-1])
        args.models = sorted(models, key=f)
    else:
        args.models = [args.model_dir]

    args.best_acc = 0
    args.index = 0
    args.best_index = 0
    for model_path in args.models:
        test_de = audio_dataset(args.feat_dir, 'testing',
                                model_settings['spectrogram_length'],
                                model_settings['dct_coefficient_count'],
                                args.per_batch_size)
        network = DSCNN(model_settings, args.model_size_info)

        load_ckpt(network, model_path, False)
        network.set_train(False)
        model = Model(network)
        args.logger.info('load model {} success'.format(model_path))
        val(args, model, test_de)
        args.index += 1

    args.logger.info('Best model:{} acc:{:.2f}%'.format(
        args.models[args.best_index], args.best_acc))
Ejemplo n.º 6
0
def test_net(data_dir,
             ckpt_path,
             cross_valid_ind=1,
             cfg=None):

    net = UNet(n_channels=cfg['num_channels'], n_classes=cfg['num_classes'])
    param_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, param_dict)

    criterion = CrossEntropyWithLogits()
    _, valid_dataset = create_dataset(data_dir, 1, 1, False, cross_valid_ind, False)
    model = Model(net, loss_fn=criterion, metrics={"dice_coeff": dice_coeff()})

    print("============== Starting Evaluating ============")
    dice_score = model.eval(valid_dataset, dataset_sink_mode=False)
    print("============== Cross valid dice coeff is:", dice_score)
Ejemplo n.º 7
0
def me_train_tensor(net, input_np, label_np, epoch_size=2):
    context.set_context(mode=context.GRAPH_MODE)
    loss = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True)
    opt = ApplyMomentum(Tensor(np.array([0.1])), Tensor(np.array([0.9])),
                        filter(lambda x: x.requires_grad, net.get_parameters()))
    Model(net, loss, opt)
    _network = wrap.WithLossCell(net, loss)
    _train_net = MsWrapper(wrap.TrainOneStepCell(_network, opt))
    _train_net.set_train()
    with SummaryRecord(SUMMARY_DIR, file_suffix="_MS_GRAPH", network=_train_net) as summary_writer:
        for epoch in range(0, epoch_size):
            print(f"epoch %d" % (epoch))
            output = _train_net(Tensor(input_np), Tensor(label_np))
            summary_writer.record(i)
            print("********output***********")
            print(output.asnumpy())
Ejemplo n.º 8
0
def train_and_eval(config):
    """
    train_and_eval.
    """
    data_path = config.data_path
    epochs = config.epochs
    print("epochs is {}".format(epochs))

    ds_train = create_dataset(data_path,
                              train_mode=True,
                              epochs=1,
                              batch_size=config.batch_size,
                              is_tf_dataset=config.is_tf_dataset)
    ds_eval = create_dataset(data_path,
                             train_mode=False,
                             epochs=1,
                             batch_size=config.batch_size,
                             is_tf_dataset=config.is_tf_dataset)

    print("ds_train.size: {}".format(ds_train.get_dataset_size()))
    print("ds_eval.size: {}".format(ds_eval.get_dataset_size()))

    net_builder = ModelBuilder()

    train_net, eval_net = net_builder.get_net(config)
    train_net.set_train()
    auc_metric = AUCMetric()

    model = Model(train_net,
                  eval_network=eval_net,
                  metrics={"auc": auc_metric})

    eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)
    callback = LossCallBack(config)
    ckptconfig = CheckpointConfig(
        save_checkpoint_steps=ds_train.get_dataset_size(),
        keep_checkpoint_max=10)
    ckpoint_cb = ModelCheckpoint(prefix='widedeep_train',
                                 directory=config.ckpt_path,
                                 config=ckptconfig)

    model.train(epochs,
                ds_train,
                callbacks=[
                    TimeMonitor(ds_train.get_dataset_size()), eval_callback,
                    callback, ckpoint_cb
                ])
def train_and_eval(config):
    """
    test_train_eval
    """
    set_seed(1000)
    data_path = config.data_path
    batch_size = config.batch_size
    epochs = config.epochs
    if config.dataset_type == "tfrecord":
        dataset_type = DataType.TFRECORD
    elif config.dataset_type == "mindrecord":
        dataset_type = DataType.MINDRECORD
    else:
        dataset_type = DataType.H5
    print("epochs is {}".format(epochs))
    ds_train = create_dataset(data_path, train_mode=True, epochs=1,
                              batch_size=batch_size, rank_id=get_rank(),
                              rank_size=get_group_size(), data_type=dataset_type)
    ds_eval = create_dataset(data_path, train_mode=False, epochs=1,
                             batch_size=batch_size, rank_id=get_rank(),
                             rank_size=get_group_size(), data_type=dataset_type)
    print("ds_train.size: {}".format(ds_train.get_dataset_size()))
    print("ds_eval.size: {}".format(ds_eval.get_dataset_size()))

    net_builder = ModelBuilder()

    train_net, eval_net = net_builder.get_net(config)
    train_net.set_train()
    auc_metric = AUCMetric()

    model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric})

    eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)

    callback = LossCallBack(config=config)
    ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5)
    ckpoint_cb = ModelCheckpoint(prefix='widedeep_train',
                                 directory=config.ckpt_path + '/ckpt_' + str(get_rank()) + '/',
                                 config=ckptconfig)
    out = model.eval(ds_eval)
    print("=====" * 5 + "model.eval() initialized: {}".format(out))
    callback_list = [TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback]
    if get_rank() == 0:
        callback_list.append(ckpoint_cb)
    model.train(epochs, ds_train,
                callbacks=callback_list,
                sink_size=ds_train.get_dataset_size())
def test_train_cifar(epoch_size=10):  # pylint: disable=missing-docstring
    context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL,
                                      gradients_mean=True)
    loss_cb = LossMonitor()
    data_path = os.getenv('DATA_PATH')
    dataset = create_dataset(data_path)
    batch_size = 32
    num_classes = 10
    net = resnet50(batch_size, num_classes)
    loss = SoftmaxCrossEntropyExpand(sparse=True)
    opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()),
                   0.01, 0.9)
    model = Model(net, loss_fn=loss, optimizer=opt)
    model.train(epoch_size,
                dataset,
                callbacks=[loss_cb],
                dataset_sink_mode=True)
Ejemplo n.º 11
0
def predict_checke_param(in_str):
    """ predict_checke_param """
    net = LeNet5()  # neural network
    context.set_context(mode=context.GRAPH_MODE)
    model = Model(net)

    a1, a2, b1, b2, b3, b4 = in_str.strip().split()
    a1 = int(a1)
    a2 = int(a2)
    b1 = int(b1)
    b2 = int(b2)
    b3 = int(b3)
    b4 = int(b4)

    nd_data = np.random.randint(a1, a2, [b1, b2, b3, b4])
    input_data = Tensor(nd_data, mindspore.float32)
    model.predict(input_data)
Ejemplo n.º 12
0
def train_net(data_dir, seg_dir, run_distribute, config=None):
    if run_distribute:
        init()
        rank_id = get_rank()
        rank_size = get_group_size()
        parallel_mode = ParallelMode.DATA_PARALLEL
        context.set_auto_parallel_context(parallel_mode=parallel_mode,
                                          device_num=rank_size,
                                          gradients_mean=True)
    else:
        rank_id = 0
        rank_size = 1
    train_dataset = create_dataset(data_path=data_dir, seg_path=seg_dir, config=config, \
                                    rank_size=rank_size, rank_id=rank_id, is_training=True)
    # train_dataset = create_dataset_diy()

    train_data_size = train_dataset.get_dataset_size()
    print("train dataset length is:", train_data_size)

    network = UNet3d(config=config)

    loss = SoftmaxCrossEntropyWithLogits()
    # loss = nn.DiceLoss()
    lr = Tensor(dynamic_lr(config, train_data_size), mstype.float32)
    optimizer = nn.Adam(params=network.trainable_params(), learning_rate=lr)
    # scale_manager = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False)
    network.set_train()

    model = Model(network, loss_fn=loss, optimizer=optimizer)
    # model = Model(network, loss_fn=loss, optimizer=optimizer, loss_scale_manager=scale_manager)

    time_cb = TimeMonitor(data_size=train_data_size)
    loss_cb = LossMonitor()
    ckpt_config = CheckpointConfig(
        save_checkpoint_steps=train_data_size,
        keep_checkpoint_max=config.keep_checkpoint_max)
    ckpoint_cb = ModelCheckpoint(prefix='{}'.format(config.model),
                                 directory='./ckpt_{}/'.format(rank_size),
                                 config=ckpt_config)
    callbacks_list = [loss_cb, time_cb, ckpoint_cb]
    print("============== Starting Training ==============")
    model.train(config.epoch_size,
                train_dataset,
                callbacks=callbacks_list,
                dataset_sink_mode=False)
    print("============== End Training ==============")
Ejemplo n.º 13
0
def test_train_eval(config):
    """
    test_train_eval
    """
    data_path = config.data_path
    batch_size = config.batch_size
    epochs = config.epochs
    ds_train = create_dataset(data_path,
                              train_mode=True,
                              epochs=epochs,
                              batch_size=batch_size)
    ds_eval = create_dataset(data_path,
                             train_mode=False,
                             epochs=epochs + 1,
                             batch_size=batch_size)
    print("ds_train.size: {}".format(ds_train.get_dataset_size()))
    print("ds_eval.size: {}".format(ds_eval.get_dataset_size()))

    net_builder = ModelBuilder()

    train_net, eval_net = net_builder.get_net(config)
    train_net.set_train()
    auc_metric = AUCMetric()

    model = Model(train_net,
                  eval_network=eval_net,
                  metrics={"auc": auc_metric})

    eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)

    callback = LossCallBack(config=config)
    ckptconfig = CheckpointConfig(
        save_checkpoint_steps=ds_train.get_dataset_size(),
        keep_checkpoint_max=5)
    ckpoint_cb = ModelCheckpoint(prefix='widedeep_train',
                                 directory=config.ckpt_path,
                                 config=ckptconfig)

    out = model.eval(ds_eval)
    print("=====" * 5 + "model.eval() initialized: {}".format(out))
    model.train(epochs,
                ds_train,
                callbacks=[
                    TimeMonitor(ds_train.get_dataset_size()), eval_callback,
                    callback, ckpoint_cb
                ])
Ejemplo n.º 14
0
def test_region_based_classification():
    """
    Compute mindspore result.
    """
    np.random.seed(5)
    ori = np.random.rand(4, 4).astype(np.float32)
    labels = np.array([[1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0],
                       [0, 1, 0, 0]]).astype(np.int32)
    np.random.seed(6)
    adv = np.random.rand(4, 4).astype(np.float32)
    model = Model(Net())
    detector = RegionBasedDetector(model)
    radius = detector.fit(ori, labels)
    detector.set_radius(radius)
    detected_res = detector.detect(adv)
    expected_value = np.array([0, 0, 1, 0])
    assert np.all(detected_res == expected_value)
Ejemplo n.º 15
0
def test_train():
    """distributed training"""
    context.set_context(mode=context.GRAPH_MODE)
    parallel_dataset = FakeData()
    strategy = ((2, 1), (1, 4))
    context.set_auto_parallel_context(parallel_mode=ParallelMode.SEMI_AUTO_PARALLEL,
                                      device_num=8,
                                      strategy_ckpt_save_file="./train_strategy.ckpt")
    network = Net(matmul_size=(96, 16), strategy=strategy)
    net_opt = Momentum(network.trainable_params(), 0.01, 0.9)
    net_loss = SoftmaxCrossEntropyWithLogits(reduction='mean')
    model = Model(network=network, loss_fn=net_loss, optimizer=net_opt)
    ckpt_config = CheckpointConfig(keep_checkpoint_max=1, integrated_save=False)
    global_rank_id = int(os.getenv("RANK_ID"))
    ckpt_path = './rank_{}_ckpt'.format(global_rank_id)
    ckpt_callback = ModelCheckpoint(prefix='parallel', directory=ckpt_path, config=ckpt_config)
    model.train(epoch=2, train_dataset=parallel_dataset, callbacks=[ckpt_callback], dataset_sink_mode=False)
    context.reset_auto_parallel_context()
Ejemplo n.º 16
0
def test_double_subgraphs_train():
    context.set_context(save_graphs=True)
    context.set_auto_parallel_context(device_num=1, global_rank=0)
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
    net = TrainStepWarp(NetWithLoss(Net()))

    batch_ids = np.ones([8, 8, 8, 8]).astype(np.int32)
    ds_train = DatasetLenet(Tensor(batch_ids), None)
    model = Model(net)
    model.train(1, ds_train, dataset_sink_mode=False)
    strategies = _executor._get_shard_strategy(net)
    expected_strategies = {'Default/network-NetWithLoss/ReduceMean-op3': [[1, 1, 1, 1]],
                           'Default/network-NetWithLoss/net-Net/ReLU-op4': [[1, 1, 1, 1]],
                           'Default/network-NetWithLoss/net-Net/Mul-op5': [[1, 1, 1, 1], [1, 1, 1, 1]],
                           'Default/network-NetWithLoss/net-Net/Mul-op6': [[1, 1, 1, 1], [1, 1, 1, 1]],
                           'Default/network-NetWithLoss/net-Net/Cast-op1': [[1, 1, 1, 1]],
                           'Default/network-NetWithLoss/ReduceSum-op7': [[1, 1, 1, 1]]}
    assert strategies == expected_strategies
def test_lenet5_train_step_training_pynative():
    """test_lenet5_train_step_training_pynative"""
    context.set_context(mode=context.PYNATIVE_MODE)
    context.reset_auto_parallel_context()
    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
                                      device_num=8,
                                      mirror_mean=True)
    predict = Tensor(np.ones([1, 1, 32, 32]).astype(np.float32) * 0.01)
    label = Tensor(np.zeros([1, 10]).astype(np.float32))
    DatasetLenet(predict, label, 2)
    network = LeNet5()
    loss_fn = nn.SoftmaxCrossEntropyWithLogits()
    optimizer = Momentum(network.get_parameters(),
                         learning_rate=0.1,
                         momentum=0.9)
    Model(network=network, loss_fn=loss_fn, optimizer=optimizer)
    context.set_context(mode=context.GRAPH_MODE)
    context.reset_auto_parallel_context()
Ejemplo n.º 18
0
def test_inference():
    """distributed inference after distributed training"""
    context.set_context(mode=context.GRAPH_MODE)
    init(backend_name="hccl")
    context.set_auto_parallel_context(
        full_batch=True,
        parallel_mode="semi_auto_parallel",
        strategy_ckpt_load_file="./train_strategy.ckpt",
        device_num=8)

    predict_data = create_predict_data()
    network = Net(matmul_size=(96, 16))
    model = Model(network)
    predict_layout = model.infer_predict_layout(Tensor(predict_data))
    ckpt_file_list = create_ckpt_file_list()
    load_distributed_checkpoint(network, ckpt_file_list, predict_layout)
    predict_result = model.predict(predict_data)
    print(predict_result)
Ejemplo n.º 19
0
def train():
    options_file = 'tests/fixtures/model/options.json'
    train_data = './tests/fixtures/train/data.txt'
    vocab_path = './tests/fixtures/train/vocab.txt'
    with open(options_file, 'r') as fin:
        options = json.load(fin)
    lr = 0.2
    epoch = 2

    lm = LanguageModel(options=options, training=True)
    opt = nn.Adagrad(lm.trainable_params(), learning_rate=lr)

    data = get_data(options, train_data, vocab_path)
    dataset = create_elmo_dataset(batch_size=options['batch_size'], data_file_path='tests/fixtures/train.mindrecord')
    
    train_one_step = ElmoTrainOnestepWithLoss(lm, opt)
    model = Model(train_one_step)
    model.train(epoch, dataset)
Ejemplo n.º 20
0
def test_net(data_dir, ckpt_path, cross_valid_ind=1, cfg=None):
    if cfg['model'] == 'unet_medical':
        net = UNetMedical(n_channels=cfg['num_channels'],
                          n_classes=cfg['num_classes'])
    elif cfg['model'] == 'unet_nested':
        net = NestedUNet(in_channel=cfg['num_channels'],
                         n_class=cfg['num_classes'],
                         use_deconv=cfg['use_deconv'],
                         use_bn=cfg['use_bn'],
                         use_ds=False)
    elif cfg['model'] == 'unet_simple':
        net = UNet(in_channel=cfg['num_channels'], n_class=cfg['num_classes'])
    else:
        raise ValueError("Unsupported model: {}".format(cfg['model']))
    param_dict = load_checkpoint(ckpt_path)
    load_param_into_net(net, param_dict)
    net = UnetEval(net)
    if 'dataset' in cfg and cfg['dataset'] == "Cell_nuclei":
        valid_dataset = create_cell_nuclei_dataset(
            data_dir,
            cfg['img_size'],
            1,
            1,
            is_train=False,
            eval_resize=cfg["eval_resize"],
            split=0.8)
    else:
        _, valid_dataset = create_dataset(data_dir,
                                          1,
                                          1,
                                          False,
                                          cross_valid_ind,
                                          False,
                                          do_crop=cfg['crop'],
                                          img_size=cfg['img_size'])
    model = Model(net,
                  loss_fn=TempLoss(),
                  metrics={"dice_coeff": dice_coeff()})

    print("============== Starting Evaluating ============")
    eval_score = model.eval(valid_dataset,
                            dataset_sink_mode=False)["dice_coeff"]
    print("============== Cross valid dice coeff is:", eval_score[0])
    print("============== Cross valid IOU is:", eval_score[1])
def test_train_cifar(epoch_size=10):
    """train net"""
    context.set_auto_parallel_context(parallel_mode=ParallelMode.AUTO_PARALLEL,
                                      gradients_mean=True)
    context.set_auto_parallel_context(pipeline_stages=2, full_batch=True)
    loss_cb = LossMonitor()
    data_path = os.getenv('DATA_PATH')
    dataset = create_dataset(data_path)
    batch_size = 32
    num_classes = 10
    net = resnet50(batch_size, num_classes)
    loss = SoftmaxCrossEntropyExpand(sparse=True)
    net_with_grads = nn.PipelineCell(nn.WithLossCell(net, loss), 4)
    opt = Momentum(net.infer_param_pipeline_stage(), 0.01, 0.9)
    model = Model(net_with_grads, optimizer=opt)
    model.train(epoch_size,
                dataset,
                callbacks=[loss_cb],
                dataset_sink_mode=True)
Ejemplo n.º 22
0
def arithmetic_operator_base(symbol):
    """ arithmetic_operator_base """
    input_np = np.random.randn(2, 3, 4, 5).astype(np.float32)
    input_me = Tensor(input_np)
    logical_operator = {
        "++": 1,
        "--": 2,
        "+": 3,
        "-": 4,
        "*": 5,
        "/": 6,
        "%": 7,
        "not": 8
    }
    x = logical_operator[symbol]
    net = arithmetic_Net(x)
    context.set_context(mode=context.GRAPH_MODE)
    model = Model(net)
    model.predict(input_me)
Ejemplo n.º 23
0
def test_eval(config):
    """
    test evaluate
    """
    data_path = config.data_path
    ckpt_path = config.ckpt_path
    batch_size = config.batch_size
    if config.dataset_type == "tfrecord":
        dataset_type = DataType.TFRECORD
    elif config.dataset_type == "mindrecord":
        dataset_type = DataType.MINDRECORD
    else:
        dataset_type = DataType.H5

    # data upload
    print('Upload data from obs to modelarts server.')
    mox.file.copy_parallel(src_url=config.data_url, dst_url=data_path)
    mox.file.copy_parallel(src_url=config.ckpt_url, dst_url=ckpt_path)

    tar_file = data_path + "train_demo.tar.gz"
    untar(tar_file, data_path)
    data_path = data_path + config.dataset_type

    ds_eval = create_dataset(data_path,
                             train_mode=False,
                             epochs=1,
                             batch_size=batch_size,
                             data_type=dataset_type)
    print("ds_eval.size: {}".format(ds_eval.get_dataset_size()))

    net_builder = ModelBuilder()
    train_net, eval_net = net_builder.get_net(config)
    param_dict = load_checkpoint(find_ckpt(ckpt_path))
    load_param_into_net(eval_net, param_dict)

    auc_metric = AUCMetric()
    model = Model(train_net,
                  eval_network=eval_net,
                  metrics={"auc": auc_metric})

    eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)

    model.eval(ds_eval, callbacks=eval_callback)
def set_env(mode="GPU", device_id=0, ckpt_path="/datasets/pretrained_weights/ms_model_small.ckpt"):
    context.set_context(mode=context.GRAPH_MODE,
                        device_target=mode, device_id=device_id)
    context.set_auto_parallel_context(parallel_mode="stand_alone")
    print('set context as: {}, using device {}.'.format(mode, device_id))

    config =  GPT2Config(
        batch_size=1,
        seq_length=1024,
        vocab_size=50257,
        d_model=768,
        num_hidden_layers=12,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        hidden_dropout=0.1,
        attention_dropout=0.1,
        max_position_embeddings=1024,
        initializer_range=0.02,
        input_mask_from_dataset=True,
        dtype=mstype.float32,
        compute_type=mstype.float32,
    )

    gpt2_loss = GPT2SummarizationModel(config=config,
                                   is_training=False,
                                   use_one_hot_embeddings=False)
    load_checkpoint_path = ckpt_path
    gpt2_loss.set_train(False)
    param_dict = load_checkpoint(load_checkpoint_path)

    param_dict_ = {}

    print("====process param_dict========")
    for msname in param_dict:
        param_dict_['gpt2.'+msname] = param_dict[msname]
    param_dict_[
        'lm_head.weight'] = param_dict['gpt2_embedding_lookup.embedding_table']
    print("====load params into model====")
    load_param_into_net(gpt2_loss, param_dict_)

    model = Model(gpt2_loss)
    return model, config
Ejemplo n.º 25
0
def test_different_args_run():
    """ test_different_args_run """
    np1 = np.random.randn(2, 3, 4, 5).astype(np.float32)
    input_me1 = Tensor(np1)
    np2 = np.random.randn(2, 3, 4, 5).astype(np.float32)
    input_me2 = Tensor(np2)

    net = Net2()
    net = add_flags(net, predit=True)
    context.set_context(mode=context.GRAPH_MODE)
    model = Model(net)
    me1 = model.predict(input_me1)
    me2 = model.predict(input_me2)
    out_me1 = me1.asnumpy()
    out_me2 = me2.asnumpy()
    print(np1)
    print(np2)
    print(out_me1)
    print(out_me2)
    assert not np.allclose(out_me1, out_me2, 0.01, 0.01)
Ejemplo n.º 26
0
def test_net(data_dir, seg_dir, ckpt_path, config=None):
    eval_dataset = create_dataset(data_path=data_dir,
                                  seg_path=seg_dir,
                                  config=config,
                                  is_training=False)
    eval_data_size = eval_dataset.get_dataset_size()
    print("train dataset length is:", eval_data_size)

    network = UNet3d(config=config)
    network.set_train(False)
    param_dict = load_checkpoint(ckpt_path)
    load_param_into_net(network, param_dict)
    model = Model(network)
    index = 0
    total_dice = 0
    for batch in eval_dataset.create_dict_iterator(num_epochs=1,
                                                   output_numpy=True):
        image = batch["image"]
        seg = batch["seg"]
        print("current image shape is {}".format(image.shape), flush=True)
        sliding_window_list, slice_list = create_sliding_window(
            image, config.roi_size, config.overlap)
        image_size = (config.batch_size, config.num_classes) + image.shape[2:]
        output_image = np.zeros(image_size, np.float32)
        count_map = np.zeros(image_size, np.float32)
        importance_map = np.ones(config.roi_size, np.float32)
        for window, slice_ in zip(sliding_window_list, slice_list):
            window_image = Tensor(window, mstype.float32)
            pred_probs = model.predict(window_image)
            output_image[slice_] += pred_probs.asnumpy()
            count_map[slice_] += importance_map
        output_image = output_image / count_map
        dice, _ = CalculateDice(output_image, seg)
        print("The {} batch dice is {}".format(index, dice), flush=True)
        total_dice += dice
        index = index + 1
    avg_dice = total_dice / eval_data_size
    print(
        "**********************End Eval***************************************"
    )
    print("eval average dice is {}".format(avg_dice))
def train_and_eval(config):
    """
    test_train_eval
    """
    set_seed(1000)
    data_path = config.data_path
    batch_size = config.batch_size
    epochs = config.epochs
    if config.dataset_type == "tfrecord":
        dataset_type = DataType.TFRECORD
    elif config.dataset_type == "mindrecord":
        dataset_type = DataType.MINDRECORD
    else:
        dataset_type = DataType.H5
    parameter_server = bool(config.parameter_server)
    cache_enable = config.vocab_cache_size > 0
    print("epochs is {}".format(epochs))
    ds_train = create_dataset(data_path, train_mode=True, epochs=1,
                              batch_size=batch_size, data_type=dataset_type)
    ds_eval = create_dataset(data_path, train_mode=False, epochs=1,
                             batch_size=batch_size, data_type=dataset_type)
    print("ds_train.size: {}".format(ds_train.get_dataset_size()))
    print("ds_eval.size: {}".format(ds_eval.get_dataset_size()))

    net_builder = ModelBuilder()

    train_net, eval_net = net_builder.get_net(config)
    train_net.set_train()
    auc_metric = AUCMetric()

    model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric})

    eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)
    callback = LossCallBack(config=config)
    ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5)
    ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig)
    callback_list = [TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb]

    model.train(epochs, ds_train,
                callbacks=callback_list,
                dataset_sink_mode=(parameter_server and cache_enable))
Ejemplo n.º 28
0
def test_deeplabv3_1p():
    start_time = time.time()
    epoch_size = 100
    args_opt = argparse.Namespace(base_size=513, crop_size=513, batch_size=2)
    args_opt.base_size = config.crop_size
    args_opt.crop_size = config.crop_size
    args_opt.batch_size = config.batch_size
    train_dataset = create_dataset(args_opt,
                                   data_url,
                                   1,
                                   config.batch_size,
                                   usage="eval")
    dataset_size = train_dataset.get_dataset_size()
    callback = LossCallBack(dataset_size)
    net = deeplabv3_resnet50(
        config.seg_num_classes,
        [config.batch_size, 3, args_opt.crop_size, args_opt.crop_size],
        infer_scale_sizes=config.eval_scales,
        atrous_rates=config.atrous_rates,
        decoder_output_stride=config.decoder_output_stride,
        output_stride=config.output_stride,
        fine_tune_batch_norm=config.fine_tune_batch_norm,
        image_pyramid=config.image_pyramid)
    net.set_train()
    model_fine_tune(net, 'layer')
    loss = OhemLoss(config.seg_num_classes, config.ignore_label)
    opt = Momentum(filter(
        lambda x: 'beta' not in x.name and 'gamma' not in x.name and 'depth'
        not in x.name and 'bias' not in x.name, net.trainable_params()),
                   learning_rate=config.learning_rate,
                   momentum=config.momentum,
                   weight_decay=config.weight_decay)
    model = Model(net, loss, opt)
    model.train(epoch_size, train_dataset, callback)
    print(time.time() - start_time)
    print("expect loss: ", callback.loss)
    print("expect time: ", callback.time)
    expect_loss = 0.92
    expect_time = 43
    assert callback.loss.asnumpy() <= expect_loss
    assert callback.time <= expect_time
Ejemplo n.º 29
0
def test_eval(config):
    """
    test evaluate
    """
    data_path = config.data_path
    batch_size = config.batch_size
    ds_eval = create_dataset(data_path, train_mode=False, epochs=2,
                             batch_size=batch_size)
    print("ds_eval.size: {}".format(ds_eval.get_dataset_size()))

    net_builder = ModelBuilder()
    train_net, eval_net = net_builder.get_net(config)

    param_dict = load_checkpoint(config.ckpt_path)
    load_param_into_net(eval_net, param_dict)

    auc_metric = AUCMetric()
    model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric})

    eval_callback = EvalCallBack(model, ds_eval, auc_metric, config)

    model.eval(ds_eval, callbacks=eval_callback)
def test_double_subgraphs_train():
    context.set_context(save_graphs=True)
    context.set_auto_parallel_context(device_num=1, global_rank=0)
    context.set_auto_parallel_context(parallel_mode="auto_parallel")
    net = TrainStepWarp(NetWithLoss(Net()))

    batch_ids = np.ones([8, 8, 8, 8]).astype(np.int32)
    ds_train = DatasetLenet(Tensor(batch_ids), None)
    model = Model(net)
    model.train(1, ds_train, dataset_sink_mode=False)
    strategies = _executor._get_shard_strategy(net)
    for (k, v) in strategies.items():
        if re.search('ReduceMean-op', k) is not None:
            assert v == [[1, 1, 1, 1]]
        elif re.search('ReLU-op', k) is not None:
            assert v == [[1, 1, 1, 1]]
        elif re.search('Mul-op', k) is not None:
            assert v == [[1, 1, 1, 1], [1, 1, 1, 1]]
        elif re.search('Cast-op', k) is not None:
            assert v == [[1, 1, 1, 1]]
        elif re.search('ReduceSum-op', k) is not None:
            assert v == [[1, 1, 1, 1]]