Ejemplo n.º 1
0
    def test_prune(self):
        main_program = fluid.Program()
        startup_program = fluid.Program()
        #   X       X              O       X              O
        # conv1-->conv2-->sum1-->conv3-->conv4-->sum2-->conv5-->conv6
        #     |            ^ |                    ^
        #     |____________| |____________________|
        #
        # X: prune output channels
        # O: prune input channels
        with fluid.unique_name.guard():
            with fluid.program_guard(main_program, startup_program):
                input = fluid.data(name="image", shape=[None, 3, 16, 16])
                label = fluid.data(name='label',
                                   shape=[None, 1],
                                   dtype='int64')
                conv1 = conv_bn_layer(input, 8, 3, "conv1", act='relu')
                conv2 = conv_bn_layer(conv1, 8, 3, "conv2", act='leaky_relu')
                sum1 = conv1 + conv2
                conv3 = conv_bn_layer(sum1, 8, 3, "conv3", act='relu6')
                conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
                sum2 = conv4 + sum1
                conv5 = conv_bn_layer(sum2, 8, 3, "conv5")

                flag = fluid.layers.fill_constant([1], value=1, dtype='int32')
                rand_flag = paddle.randint(2, dtype='int32')
                cond = fluid.layers.less_than(x=flag, y=rand_flag)
                cond_output = fluid.layers.create_global_var(
                    shape=[1],
                    value=0.0,
                    dtype='float32',
                    persistable=False,
                    name='cond_output')

                def cond_block1():
                    cond_conv = conv_bn_layer(conv5, 8, 3, "conv_cond1_1")
                    fluid.layers.assign(input=cond_conv, output=cond_output)

                def cond_block2():
                    cond_conv1 = conv_bn_layer(conv5, 8, 3, "conv_cond2_1")
                    cond_conv2 = conv_bn_layer(cond_conv1, 8, 3,
                                               "conv_cond2_2")
                    fluid.layers.assign(input=cond_conv2, output=cond_output)

                fluid.layers.cond(cond, cond_block1, cond_block2)
                sum3 = fluid.layers.sum([sum2, cond_output])

                conv6 = conv_bn_layer(sum3, 8, 3, "conv6")
                sub1 = conv6 - sum3
                mult = sub1 * sub1
                conv7 = conv_bn_layer(mult,
                                      8,
                                      3,
                                      "Depthwise_Conv7",
                                      groups=8,
                                      use_cudnn=False)
                floored = fluid.layers.floor(conv7)
                scaled = fluid.layers.scale(floored)
                concated = fluid.layers.concat([scaled, mult], axis=1)
                conv8 = conv_bn_layer(concated, 8, 3, "conv8")
                predict = fluid.layers.fc(input=conv8, size=10, act='softmax')
                cost = fluid.layers.cross_entropy(input=predict, label=label)
                adam_optimizer = fluid.optimizer.AdamOptimizer(0.01)
                avg_cost = fluid.layers.mean(cost)
                adam_optimizer.minimize(avg_cost)

        params = []
        for param in main_program.all_parameters():
            if 'conv' in param.name:
                params.append(param.name)
        #TODO: To support pruning convolution before fc layer.
        params.remove('conv8_weights')

        place = fluid.CUDAPlace(0)
        exe = fluid.Executor(place)
        exe.run(startup_program)
        x = np.random.random(size=(10, 3, 16, 16)).astype('float32')
        label = np.random.random(size=(10, 1)).astype('int64')
        loss_data, = exe.run(main_program,
                             feed={
                                 "image": x,
                                 "label": label
                             },
                             fetch_list=[cost.name])
        pruner = Pruner()
        main_program, _, _ = pruner.prune(main_program,
                                          fluid.global_scope(),
                                          params=params,
                                          ratios=[0.5] * len(params),
                                          place=place,
                                          lazy=False,
                                          only_graph=False,
                                          param_backup=None,
                                          param_shape_backup=None)

        loss_data, = exe.run(main_program,
                             feed={
                                 "image": x,
                                 "label": label
                             },
                             fetch_list=[cost.name])
Ejemplo n.º 2
0
def fast_infer(args):
    """
    Inference by beam search decoder based solely on Fluid operators.
    """
    out_ids, out_scores, pyreader = fast_decoder(
        ModelHyperParams.src_vocab_size,
        ModelHyperParams.trg_vocab_size,
        ModelHyperParams.phone_vocab_size,
        ModelHyperParams.max_length + 1,
        ModelHyperParams.n_layer,
        ModelHyperParams.n_head,
        ModelHyperParams.d_key,
        ModelHyperParams.d_value,
        ModelHyperParams.d_model,
        ModelHyperParams.d_inner_hid,
        ModelHyperParams.prepostprocess_dropout,
        ModelHyperParams.attention_dropout,
        ModelHyperParams.relu_dropout,
        ModelHyperParams.preprocess_cmd,
        ModelHyperParams.postprocess_cmd,
        ModelHyperParams.weight_sharing,
        InferTaskConfig.beam_size,
        InferTaskConfig.max_out_len,
        ModelHyperParams.bos_idx,
        ModelHyperParams.eos_idx,
        beta=ModelHyperParams.beta,
        use_py_reader=args.use_py_reader)

    # This is used here to set dropout to the test mode.
    infer_program = fluid.default_main_program().clone(for_test=True)

    if InferTaskConfig.use_gpu:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    fluid.io.load_vars(exe,
                       InferTaskConfig.model_path,
                       vars=[
                           var for var in infer_program.list_vars()
                           if isinstance(var, fluid.framework.Parameter)
                       ])

    exec_strategy = fluid.ExecutionStrategy()
    # For faster executor
    exec_strategy.use_experimental_executor = True
    exec_strategy.num_threads = 1
    build_strategy = fluid.BuildStrategy()
    infer_exe = fluid.ParallelExecutor(use_cuda=TrainTaskConfig.use_gpu,
                                       main_program=infer_program,
                                       build_strategy=build_strategy,
                                       exec_strategy=exec_strategy)

    # data reader settings for inference
    args.train_file_pattern = args.test_file_pattern
    args.use_token_batch = False
    args.sort_type = reader.SortType.NONE
    args.shuffle = False
    args.shuffle_batch = False
    test_data = prepare_data_generator(
        args,
        is_test=False,
        count=dev_count,
        pyreader=pyreader,
        py_reader_provider_wrapper=py_reader_provider_wrapper,
        place=place)
    if args.use_py_reader:
        pyreader.start()
        data_generator = None
    else:
        data_generator = test_data()
    trg_idx2word = reader.DataReader.load_dict(dict_path=args.trg_vocab_fpath,
                                               reverse=True)

    while True:
        try:
            feed_dict_list = prepare_feed_dict_list(data_generator, dev_count,
                                                    place)
            if args.use_parallel_exe:
                seq_ids, seq_scores = infer_exe.run(
                    fetch_list=[out_ids.name, out_scores.name],
                    feed=feed_dict_list,
                    return_numpy=False)
            else:
                seq_ids, seq_scores = exe.run(
                    program=infer_program,
                    fetch_list=[out_ids.name, out_scores.name],
                    feed=feed_dict_list[0]
                    if feed_dict_list is not None else None,
                    return_numpy=False,
                    use_program_cache=True)
            seq_ids_list, seq_scores_list = [
                seq_ids
            ], [seq_scores] if isinstance(
                seq_ids, paddle.fluid.LoDTensor) else (seq_ids, seq_scores)
            for seq_ids, seq_scores in zip(seq_ids_list, seq_scores_list):
                # How to parse the results:
                #   Suppose the lod of seq_ids is:
                #     [[0, 3, 6], [0, 12, 24, 40, 54, 67, 82]]
                #   then from lod[0]:
                #     there are 2 source sentences, beam width is 3.
                #   from lod[1]:
                #     the first source sentence has 3 hyps; the lengths are 12, 12, 16
                #     the second source sentence has 3 hyps; the lengths are 14, 13, 15
                hyps = [[] for i in range(len(seq_ids.lod()[0]) - 1)]
                scores = [[] for i in range(len(seq_scores.lod()[0]) - 1)]
                for i in range(len(seq_ids.lod()[0]) -
                               1):  # for each source sentence
                    start = seq_ids.lod()[0][i]
                    end = seq_ids.lod()[0][i + 1]
                    for j in range(end - start):  # for each candidate
                        sub_start = seq_ids.lod()[1][start + j]
                        sub_end = seq_ids.lod()[1][start + j + 1]
                        hyps[i].append(" ".join([
                            trg_idx2word[idx] for idx in post_process_seq(
                                np.array(seq_ids)[sub_start:sub_end])
                        ]))
                        scores[i].append(np.array(seq_scores)[sub_end - 1])
                        print(hyps[i][-1])
                        if len(hyps[i]) >= InferTaskConfig.n_best:
                            break
        except (StopIteration, fluid.core.EOFException):
            # The data pass is over.
            if args.use_py_reader:
                pyreader.reset()
            break
Ejemplo n.º 3
0
def do_train(args):

    train_prog = fluid.default_main_program()
    startup_prog = fluid.default_startup_program()

    with fluid.program_guard(train_prog, startup_prog):
        train_prog.random_seed = args.random_seed
        startup_prog.random_seed = args.random_seed

        with fluid.unique_name.guard():

            # define input and reader

            input_slots = [{
                "name": "src_ids",
                "shape": (-1, args.max_seq_len, 1),
                "dtype": "int64"
            }, {
                "name": "pos_ids",
                "shape": (-1, args.max_seq_len, 1),
                "dtype": "int64"
            }, {
                "name": "sent_ids",
                "shape": (-1, args.max_seq_len, 1),
                "dtype": "int64"
            }, {
                "name": "input_mask",
                "shape": (-1, args.max_seq_len, 1),
                "dtype": "float32"
            }, {
                "name": "input_span_mask",
                "shape": (-1, args.max_seq_len),
                "dtype": "float32"
            }, {
                "name": "start_positions",
                "shape": (-1, 1),
                "dtype": "int64"
            }, {
                "name": "end_positions",
                "shape": (-1, 1),
                "dtype": "int64"
            }, {
                "name": "is_null_answer",
                "shape": (-1, 1),
                "dtype": "int64"
            }]

            input_field = InputField(input_slots)
            input_field.build(build_pyreader=True)

            # define the network

            loss = create_net(is_training=True,
                              model_input=input_field,
                              args=args)

            loss.persistable = True

            # define the optimizer

            if args.use_cuda:
                dev_count = fluid.core.get_cuda_device_count()
            else:
                dev_count = int(
                    os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

            # as we need to get the max training steps for warmup training,
            # we define the data processer in advance
            # usually, we can declare data processor later, outsides the program_gurad scope

            processor = DataProcessor(vocab_path=args.vocab_path,
                                      do_lower_case=args.do_lower_case,
                                      max_seq_length=args.max_seq_len,
                                      in_tokens=args.in_tokens,
                                      doc_stride=args.doc_stride,
                                      do_stride=args.do_stride,
                                      max_query_length=args.max_query_len)

            ## define the data generator
            batch_generator = processor.data_generator(
                data_path=args.training_file,
                batch_size=args.batch_size,
                phase="train",
                shuffle=True,
                dev_count=dev_count,
                epoch=args.epoch)

            num_train_examples = processor.get_num_examples(phase='train')
            max_train_steps = args.epoch * num_train_examples // dev_count // args.batch_size
            warmup_steps = int(max_train_steps * args.warmup_proportion)

            print(max_train_steps, warmup_steps, num_train_examples)

            optimizor = optimization(loss=loss,
                                     warmup_steps=warmup_steps,
                                     num_train_steps=max_train_steps,
                                     learning_rate=args.learning_rate,
                                     train_program=train_prog,
                                     startup_prog=startup_prog,
                                     weight_decay=args.weight_decay,
                                     scheduler=args.lr_scheduler,
                                     use_fp16=args.use_fp16,
                                     loss_scaling=args.loss_scaling)

    # prepare training

    ## decorate the pyreader with batch_generator
    input_field.reader.decorate_batch_generator(batch_generator)

    ## define the executor and program for training

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)

    exe.run(startup_prog)

    assert (args.init_from_checkpoint == "") or (args.init_from_pretrain_model
                                                 == "")

    ## init from some checkpoint, to resume the previous training
    if args.init_from_checkpoint:
        init_from_checkpoint(args, exe, train_prog)

    ## init from some pretrain models, to better solve the current task
    if args.init_from_pretrain_model:
        init_from_pretrain_model(args, exe, train_prog)

    build_strategy = fluid.compiler.BuildStrategy()
    build_strategy.enable_inplace = True

    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name, build_strategy=build_strategy)

    # start training

    step = 0
    for epoch_step in range(args.epoch):
        input_field.reader.start()
        while True:
            try:

                # this is for minimizing the fetching op, saving the training speed.
                if step % args.print_step == 0:
                    fetch_list = [loss.name]
                else:
                    fetch_list = []

                output = exe.run(compiled_train_prog, fetch_list=fetch_list)

                if step % args.print_step == 0:
                    print("step: %d, loss: %.4f" % (step, np.sum(output[0])))

                if step % args.save_step == 0 and step != 0:

                    if args.save_checkpoint:
                        save_checkpoint(args, exe, train_prog,
                                        "step_" + str(step))

                    if args.save_param:
                        save_param(args, exe, train_prog, "step_" + str(step))

                step += 1

            except fluid.core.EOFException:
                input_field.reader.reset()
                break

    if args.save_checkpoint:
        save_checkpoint(args, exe, train_prog, "step_final")

    if args.save_param:
        save_param(args, exe, train_prog, "step_final")
Ejemplo n.º 4
0
def fit():
    role = role_maker.UserDefinedRoleMaker(
        current_id=current_id,
        role=role_maker.Role.WORKER if bool(1==int(roles)) else role_maker.Role.SERVER,
        worker_num=2,
        server_endpoints=["127.0.0.1:36011"])
    fleet.init(role)
    BATCH_SIZE = 128
    type_size=createDataList(model_file_path,model_file_path+'.data'+"/")
    # 用于训练的数据提供器
    train_reader=paddle.batch(reader=paddle.reader.shuffle(reader=dataReader(in_file_path+".data/trainer.list"),buf_size=BATCH_SIZE*100), batch_size=BATCH_SIZE)
    test_reader=paddle.batch(reader=paddle.reader.shuffle(reader=dataReader(in_file_path+".data/test.list"),buf_size=BATCH_SIZE*100), batch_size=BATCH_SIZE)
    data_shape = [3, 32, 32]
    images = fluid.layers.data(name='images', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # 获取分类器
    predict = networkConfiguration(images,type_size)

    # 定义损失函数和准确率
    cost = fluid.layers.cross_entropy(input=predict, label=label)   # 交叉熵
    avg_cost = fluid.layers.mean(cost)                              # 计算cost中所有元素的平均值
    acc = fluid.layers.accuracy(input=predict, label=label)         # 使用输入和标签计算准确率

    # 定义优化方法
    test_program = fluid.default_main_program().clone(for_test=True)    # 获取测试程序
    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    strategy = DistributeTranspilerConfig()
    strategy.sync_mode = True
    optimizer = fleet.distributed_optimizer(optimizer,strategy)
    # 定义优化方法
    optimizer.minimize(avg_cost)

    if fleet.is_server():
        print("启动server")
        fleet.init_server()
        fleet.run_server()

    elif fleet.is_worker():
        print("启动worker")
        ########## 模型训练&模型评估 ##########
        # 创建Executor
        use_cuda = False # 定义使用CPU还是GPU,使用CPU时use_cuda=False
        place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
        print("cpu")
        # 定义数据映射器
        feeder = fluid.DataFeeder(feed_list=[images, label], place=place)
        print("数据映射")
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        fleet.init_worker()
        print(fleet.worker_endpoints())
        for pass_id in range(EPOCH_NUM):
            print(pass_id)
            # 开始训练
            for batch_id, data in enumerate(train_reader()):                            # 遍历train_reader
                train_cost, train_acc = exe.run(program=fluid.default_main_program(),   # 运行主程序
                                                feed=feeder.feed(data),                 # 喂入一个batch的数据
                                                fetch_list=[avg_cost, acc])             # fetch均方误差和准确率         # fetch均方误差和准确率
                # 每100次batch打印一次训练、进行一次测试
                if batch_id % 20 == 0:
                    print('Pass:%d, Batch:%d, Cost:%0.5f, Accuracy:%0.5f' %(pass_id, batch_id, train_cost[0], train_acc[0]))
            # 开始测试
            test_costs = [] # 测试的损失值
            test_accs = []  # 测试的准确率
            for batch_id, data in enumerate(test_reader()):
                test_cost, test_acc = exe.run(program=test_program,         # 执行训练程序
                                            feed=feeder.feed(data),       # 喂入数据
                                            fetch_list=[avg_cost, acc])   # fetch误差、准确率
                test_costs.append(test_cost[0])                             # 记录每个batch的损失值
                test_accs.append(test_acc[0])                               # 记录每个batch的准确率

            test_cost = (sum(test_costs) / len(test_costs)) # 计算误差平均值
            test_acc = (sum(test_accs) / len(test_accs))    # 计算准确率平均值
            print('Test:%d, Cost:%0.5f, ACC:%0.5f' % (pass_id, test_cost, test_acc))
        save(predict,model_file_path,exe)
        fleet.stop_worker()
Ejemplo n.º 5
0
def eval():
    args = parse_args()
    print_arguments(args)
    # check whether the installed paddle is compiled with GPU
    # PointRCNN model can only run on GPU
    check_gpu(True)

    load_config(args.cfg)
    if args.set_cfgs is not None:
        set_config_from_list(args.set_cfgs)

    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)

    if args.eval_mode == 'rpn':
        cfg.RPN.ENABLED = True
        cfg.RCNN.ENABLED = False
    elif args.eval_mode == 'rcnn':
        cfg.RCNN.ENABLED = True
        cfg.RPN.ENABLED = cfg.RPN.FIXED = True
        assert args.batch_size, "batch size must be 1 in rcnn evaluation"
    elif args.eval_mode == 'rcnn_offline':
        cfg.RCNN.ENABLED = True
        cfg.RPN.ENABLED = False
        assert args.batch_size, "batch size must be 1 in rcnn_offline evaluation"
    else:
        raise NotImplementedError("unkown eval mode: {}".format(
            args.eval_mode))

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)

    # build model
    startup = fluid.Program()
    eval_prog = fluid.Program()
    with fluid.program_guard(eval_prog, startup):
        with fluid.unique_name.guard():
            eval_model = PointRCNN(cfg, args.batch_size, True, 'TEST')
            eval_model.build()
            eval_loader = eval_model.get_loader()
            eval_feeds = eval_model.get_feeds()
            eval_outputs = eval_model.get_outputs()
    eval_prog = eval_prog.clone(True)

    extra_keys = []
    if args.eval_mode == 'rpn':
        extra_keys.extend(['sample_id', 'rpn_cls_label', 'gt_boxes3d'])
        if args.save_rpn_feature:
            extra_keys.extend([
                'pts_rect',
                'pts_features',
                'pts_input',
            ])
    eval_keys, eval_values = parse_outputs(eval_outputs,
                                           prog=eval_prog,
                                           extra_keys=extra_keys)

    eval_compile_prog = fluid.compiler.CompiledProgram(
        eval_prog).with_data_parallel()

    exe.run(startup)

    # load weights
    if not os.path.isdir(args.weights):
        assert os.path.exists("{}.pdparams".format(args.weights)), \
                "Given resume weight {}.pdparams not exist.".format(args.weights)
    fluid.load(eval_prog, args.weights, exe)

    kitti_feature_dir = os.path.join(args.output_dir, 'features')
    kitti_output_dir = os.path.join(args.output_dir, 'detections', 'data')
    seg_output_dir = os.path.join(args.output_dir, 'seg_result')
    if args.save_rpn_feature:
        if os.path.exists(kitti_feature_dir):
            shutil.rmtree(kitti_feature_dir)
        os.makedirs(kitti_feature_dir)
        if os.path.exists(kitti_output_dir):
            shutil.rmtree(kitti_output_dir)
        os.makedirs(kitti_output_dir)
        if os.path.exists(seg_output_dir):
            shutil.rmtree(seg_output_dir)
        os.makedirs(seg_output_dir)

    # must make sure these dirs existing
    roi_output_dir = os.path.join('./result_dir', 'roi_result', 'data')
    refine_output_dir = os.path.join('./result_dir', 'refine_result', 'data')
    final_output_dir = os.path.join("./result_dir", 'final_result', 'data')
    if not os.path.exists(final_output_dir):
        os.makedirs(final_output_dir)
    if args.save_result:
        if not os.path.exists(roi_output_dir):
            os.makedirs(roi_output_dir)
        if not os.path.exists(refine_output_dir):
            os.makedirs(refine_output_dir)

    # get reader
    kitti_rcnn_reader = KittiRCNNReader(
        data_dir=args.data_dir,
        npoints=cfg.RPN.NUM_POINTS,
        split=cfg.TEST.SPLIT,
        mode='EVAL',
        classes=cfg.CLASSES,
        rcnn_eval_roi_dir=args.rcnn_eval_roi_dir,
        rcnn_eval_feature_dir=args.rcnn_eval_feature_dir)
    eval_reader = kitti_rcnn_reader.get_multiprocess_reader(
        args.batch_size, eval_feeds)
    eval_loader.set_sample_list_generator(eval_reader, place)

    thresh_list = [0.1, 0.3, 0.5, 0.7, 0.9]
    queue = multiprocessing.Queue(128)
    mgr = multiprocessing.Manager()
    lock = multiprocessing.Lock()
    mdict = mgr.dict()
    if cfg.RPN.ENABLED:
        mdict['exit_proc'] = 0
        mdict['total_gt_bbox'] = 0
        mdict['total_cnt'] = 0
        mdict['total_rpn_iou'] = 0
        for i in range(len(thresh_list)):
            mdict['total_recalled_bbox_list_{}'.format(i)] = 0

        p_list = []
        for i in range(METRIC_PROC_NUM):
            p_list.append(
                multiprocessing.Process(target=rpn_metric,
                                        args=(queue, mdict, lock, thresh_list,
                                              args.save_rpn_feature,
                                              kitti_feature_dir,
                                              seg_output_dir, kitti_output_dir,
                                              kitti_rcnn_reader, cfg.CLASSES)))
            p_list[-1].start()

    if cfg.RCNN.ENABLED:
        for i in range(len(thresh_list)):
            mdict['total_recalled_bbox_list_{}'.format(i)] = 0
            mdict['total_roi_recalled_bbox_list_{}'.format(i)] = 0
        mdict['exit_proc'] = 0
        mdict['total_cls_acc'] = 0
        mdict['total_cls_acc_refined'] = 0
        mdict['total_det_num'] = 0
        mdict['total_gt_bbox'] = 0
        p_list = []
        for i in range(METRIC_PROC_NUM):
            p_list.append(
                multiprocessing.Process(
                    target=rcnn_metric,
                    args=(queue, mdict, lock, thresh_list, kitti_rcnn_reader,
                          roi_output_dir, refine_output_dir, final_output_dir,
                          args.save_result)))
            p_list[-1].start()

    try:
        eval_loader.start()
        eval_iter = 0
        start_time = time.time()

        cur_time = time.time()
        while True:
            eval_outs = exe.run(eval_compile_prog,
                                fetch_list=eval_values,
                                return_numpy=False)
            rets_dict = {
                k: (np.array(v), v.recursive_sequence_lengths())
                for k, v in zip(eval_keys, eval_outs)
            }
            run_time = time.time() - cur_time
            cur_time = time.time()
            queue.put(rets_dict)
            eval_iter += 1

            logger.info("[EVAL] iter {}, time: {:.2f}".format(
                eval_iter, run_time))

    except fluid.core.EOFException:
        # terminate metric process
        for i in range(METRIC_PROC_NUM):
            queue.put(None)
        while mdict['exit_proc'] < METRIC_PROC_NUM:
            time.sleep(1)
        for p in p_list:
            if p.is_alive():
                p.join()

        end_time = time.time()
        logger.info(
            "[EVAL] total {} iter finished, average time: {:.2f}".format(
                eval_iter, (end_time - start_time) / float(eval_iter)))

        if cfg.RPN.ENABLED:
            avg_rpn_iou = mdict['total_rpn_iou'] / max(len(kitti_rcnn_reader),
                                                       1.)
            logger.info("average rpn iou: {:.3f}".format(avg_rpn_iou))
            total_gt_bbox = float(max(mdict['total_gt_bbox'], 1.0))
            for idx, thresh in enumerate(thresh_list):
                recall = mdict['total_recalled_bbox_list_{}'.format(
                    idx)] / total_gt_bbox
                logger.info(
                    "total bbox recall(thresh={:.3f}): {} / {} = {:.3f}".
                    format(thresh,
                           mdict['total_recalled_bbox_list_{}'.format(idx)],
                           mdict['total_gt_bbox'], recall))

        if cfg.RCNN.ENABLED:
            cnt = float(max(eval_iter, 1.0))
            avg_cls_acc = mdict['total_cls_acc'] / cnt
            avg_cls_acc_refined = mdict['total_cls_acc_refined'] / cnt
            avg_det_num = mdict['total_det_num'] / cnt

            logger.info("avg_cls_acc: {}".format(avg_cls_acc))
            logger.info("avg_cls_acc_refined: {}".format(avg_cls_acc_refined))
            logger.info("avg_det_num: {}".format(avg_det_num))

            total_gt_bbox = float(max(mdict['total_gt_bbox'], 1.0))
            for idx, thresh in enumerate(thresh_list):
                cur_roi_recall = mdict['total_roi_recalled_bbox_list_{}'.
                                       format(idx)] / total_gt_bbox
                logger.info(
                    'total roi bbox recall(thresh=%.3f): %d / %d = %f' %
                    (thresh,
                     mdict['total_roi_recalled_bbox_list_{}'.format(idx)],
                     total_gt_bbox, cur_roi_recall))

            for idx, thresh in enumerate(thresh_list):
                cur_recall = mdict['total_recalled_bbox_list_{}'.format(
                    idx)] / total_gt_bbox
                logger.info(
                    'total bbox recall(thresh=%.2f) %d / %.2f = %.4f' %
                    (thresh, mdict['total_recalled_bbox_list_{}'.format(idx)],
                     total_gt_bbox, cur_recall))

            split_file = os.path.join('./data/KITTI', 'ImageSets', 'val.txt')
            image_idx_list = [x.strip() for x in open(split_file).readlines()]
            for k in range(image_idx_list.__len__()):
                cur_file = os.path.join(final_output_dir,
                                        '%s.txt' % image_idx_list[k])
                if not os.path.exists(cur_file):
                    with open(cur_file, 'w') as temp_f:
                        pass

            if float(sys.version[:3]) >= 3.6:
                label_dir = os.path.join('./data/KITTI/object/training',
                                         'label_2')
                split_file = os.path.join('./data/KITTI', 'ImageSets',
                                          'val.txt')
                final_output_dir = os.path.join("./result_dir", 'final_result',
                                                'data')
                name_to_class = {'Car': 0, 'Pedestrian': 1, 'Cyclist': 2}

                from tools.kitti_object_eval_python.evaluate import evaluate as kitti_evaluate
                ap_result_str, ap_dict = kitti_evaluate(
                    label_dir,
                    final_output_dir,
                    label_split_file=split_file,
                    current_class=name_to_class["Car"])

                logger.info("KITTI evaluate: {}, {}".format(
                    ap_result_str, ap_dict))

            else:
                logger.info(
                    "KITTI mAP only support python version >= 3.6, users can "
                    "run 'python3 tools/kitti_eval.py' to evaluate KITTI mAP.")

    finally:
        eval_loader.reset()
Ejemplo n.º 6
0
    def run_trainer(self, args):
        self.lr = args.lr
        if args.nccl2_reduce_layer_local_run:
            test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
                self.get_model(batch_size=args.batch_size, single_device=True)
        elif args.use_dgc:
            test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
                self.get_model(batch_size=args.batch_size, use_dgc=args.use_dgc)
        else:
            test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
                self.get_model(batch_size=args.batch_size)

        if args.update_method == "pserver":
            print_to_err(
                type(self).__name__,
                "begin to run transpile on trainer with pserver mode")
            t = self.get_transpiler(
                trainer_id=args.trainer_id,
                main_program=fluid.default_main_program(),
                pserver_endpoints=args.endpoints,
                trainers=args.trainers,
                sync_mode=args.sync_mode,
                dc_asgd=args.dc_asgd,
                hogwild_mode=args.hogwild)

            trainer_prog = t.get_trainer_program()
            print_to_err(
                type(self).__name__,
                "get trainer program done with pserver mode.")
        elif args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer":
            # transpile for nccl2
            config = fluid.DistributeTranspilerConfig()
            config.mode = "nccl2"
            config.nccl_comm_num = args.nccl_comm_num
            if args.use_hallreduce:
                config.use_hierarchical_allreduce = True
                config.hierarchical_allreduce_inter_nranks = args.hallreduce_inter_nranks
            print_to_err(
                type(self).__name__,
                "begin to run transpile on trainer with nccl2 mode")
            nccl2_t = fluid.DistributeTranspiler(config=config)
            nccl2_t.transpile(
                args.trainer_id,
                program=fluid.default_main_program(),
                startup_program=fluid.default_startup_program(),
                trainers=args.endpoints,
                current_endpoint=args.current_endpoint)
            print_to_err(
                type(self).__name__,
                "get trainer program done. with nccl2 mode")
            trainer_prog = fluid.default_main_program()
        else:
            print_to_err(
                type(self).__name__,
                "do nothing about main program, just use it")
            trainer_prog = fluid.default_main_program()
            print_to_err(type(self).__name__, "use main program done.")

        if args.use_cuda:
            device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
            place = fluid.CUDAPlace(device_id)
        else:
            place = fluid.CPUPlace()

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        print_to_err(type(self).__name__, "run worker startup program done.")

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = 1

        build_stra = fluid.BuildStrategy()
        # FIXME force disable enable_inplace and memory_optimize
        build_stra.enable_inplace = False
        build_stra.memory_optimize = False

        if args.hogwild:
            build_stra.async_mode = True

        if args.enable_backward_deps:
            build_stra.enable_backward_optimizer_op_deps = True

        if args.use_reduce:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
        else:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce

        pass_builder = None
        if args.batch_merge_repeat > 1:
            pass_builder = build_stra._finalize_strategy_and_create_passes()
            mypass = pass_builder.insert_pass(0, "multi_batch_merge_pass")
            mypass.set("num_repeats", args.batch_merge_repeat)

        if args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer":
            build_stra.num_trainers = len(args.endpoints.split(","))
            build_stra.trainer_id = args.trainer_id
        else:
            # case args.update_method == "nccl2_reduce_layer":
            build_stra.num_trainers = 1
            build_stra.trainer_id = 0

        print_to_err(type(self).__name__, "begin to compile with data parallel")
        binary = compiler.CompiledProgram(trainer_prog).with_data_parallel(
            loss_name=avg_cost.name,
            build_strategy=build_stra,
            exec_strategy=exec_strategy)
        print_to_err(type(self).__name__, "program compiled with data parallel")

        feed_var_list = [
            var for var in trainer_prog.global_block().vars.values()
            if var.is_data
        ]

        feeder = fluid.DataFeeder(feed_var_list, place)
        reader_generator = train_reader()

        def get_data():
            origin_batch = next(reader_generator)
            if args.update_method != "local" and args.use_reader_alloc:
                new_batch = []
                for offset, item in enumerate(origin_batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return origin_batch

        print_to_err(type(self).__name__, "begin to train on trainer")
        out_losses = []
        for i in six.moves.xrange(RUN_STEP):
            loss, = exe.run(binary,
                            fetch_list=[avg_cost.name],
                            feed=feeder.feed(get_data()))
            out_losses.append(loss[0])
            print_to_err(type(self).__name__, "run step %d finished" % i)
        print_to_err(type(self).__name__, "trainer run finished")

        print_to_out(out_losses)
Ejemplo n.º 7
0
def compress(args):
    ############################################################################################################
    # 1. quantization configs
    ############################################################################################################
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    train_prog = fluid.default_main_program()
    val_program = fluid.default_main_program().clone(for_test=True)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    ############################################################################################################
    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.
    ############################################################################################################
    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      for_test=False)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.batch(train_reader,
                                batch_size=args.batch_size,
                                drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog):

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))
            batch_id += 1

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.sync_batch_norm = False
    exec_strategy = fluid.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    ############################################################################################################
    # train loop
    ############################################################################################################
    best_acc1 = 0.0
    best_epoch = 0
    for i in range(args.num_epochs):
        train(i, compiled_train_prog)
        acc1 = test(i, val_program)
        fluid.io.save_persistables(exe,
                                   dirname=os.path.join(
                                       args.checkpoint_dir, str(i)),
                                   main_program=val_program)
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            fluid.io.save_persistables(exe,
                                       dirname=os.path.join(
                                           args.checkpoint_dir, 'best_model'),
                                       main_program=val_program)

    fluid.io.load_persistables(exe,
                               dirname=os.path.join(args.checkpoint_dir,
                                                    'best_model'),
                               main_program=val_program)
    ############################################################################################################
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    ############################################################################################################
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    print("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    ############################################################################################################
    # 4. Save inference model
    ############################################################################################################
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    int8_path = os.path.join(model_path, 'int8')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    fluid.io.save_inference_model(dirname=float_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=float_program,
                                  model_filename=float_path + '/model',
                                  params_filename=float_path + '/params')

    fluid.io.save_inference_model(dirname=int8_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=int8_program,
                                  model_filename=int8_path + '/model',
                                  params_filename=int8_path + '/params')
Ejemplo n.º 8
0
 def setUp(self):
     self.places = [fluid.CPUPlace()]
     if core.is_compiled_with_cuda():
         self.places.append(fluid.CUDAPlace(0))
Ejemplo n.º 9
0
def main(args):
    hidden_size = args.hidden_size
    neg_num = args.neg_num
    epoch = args.epoch
    p = args.p
    q = args.q
    save_path = args.save_path
    batch_size = args.batch_size
    walk_len = args.walk_len
    win_size = args.win_size

    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    dataset = load(args.dataset)

    if args.offline_learning:
        log.info("Start random walk on disk...")
        walk_save_path = os.path.join(save_path, "walks")
        if not os.path.isdir(walk_save_path):
            os.makedirs(walk_save_path)
        pool = Pool(args.processes)
        args_list = [(x, dataset.graph, walk_save_path, 1, batch_size,
                      walk_len, p, q, np.random.randint(2**32))
                     for x in range(epoch)]
        pool.map(process, args_list)
        filelist = glob.glob(os.path.join(walk_save_path, "*"))
        log.info("Random walk on disk Done.")
    else:
        filelist = None

    train_steps = int(dataset.graph.num_nodes / batch_size) * epoch

    place = fluid.CUDAPlace(0) if args.use_cuda else fluid.CPUPlace()
    node2vec_prog = fluid.Program()
    startup_prog = fluid.Program()

    with fluid.program_guard(node2vec_prog, startup_prog):
        with fluid.unique_name.guard():
            node2vec_pyreader, node2vec_loss = node2vec_model(
                dataset.graph, hidden_size=hidden_size, neg_num=neg_num)
            lr = l.polynomial_decay(0.025, train_steps, 0.0001)
            adam = fluid.optimizer.Adam(lr)
            adam.minimize(node2vec_loss)

    node2vec_pyreader.decorate_tensor_provider(
        node2vec_generator(dataset.graph,
                           batch_size=batch_size,
                           walk_len=walk_len,
                           win_size=win_size,
                           epoch=epoch,
                           neg_num=neg_num,
                           p=p,
                           q=q,
                           filelist=filelist))

    node2vec_pyreader.start()

    exe = fluid.Executor(place)
    exe.run(startup_prog)

    prev_time = time.time()
    step = 0

    while 1:
        try:
            node2vec_loss_val = exe.run(node2vec_prog,
                                        fetch_list=[node2vec_loss],
                                        return_numpy=True)[0]
            cur_time = time.time()
            use_time = cur_time - prev_time
            prev_time = cur_time
            step += 1
            log.info("Step %d " % step +
                     "Node2vec Loss: %f " % node2vec_loss_val +
                     " %f s/step." % use_time)
        except fluid.core.EOFException:
            node2vec_pyreader.reset()
            break

    fluid.io.save_persistables(exe, os.path.join(save_path, "paddle_model"),
                               node2vec_prog)
Ejemplo n.º 10
0
def eval(vocab, infer_progs, dev_count, logger, args):
    infer_prog, infer_startup_prog, infer_model = infer_progs
    feed_order = infer_model.feed_order
    loss = infer_model.loss

    # prepare device
    place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
    exe = Executor(place)
    if not args.use_gpu:
        place = fluid.CPUPlace()
        import multiprocessing
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    else:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()

    if args.para_print:
        with open("infer_program.desc", 'w') as f:
            print(str(infer_prog), file=f)

    # todo infer_startup_prog is not used, implicitly training scope will be used
    # Use test set as validation each pass
    total_loss = 0.0
    total_cnt = 0
    n_batch_cnt = 0
    n_batch_loss = 0.0
    val_feed_list = [
        infer_prog.global_block().var(var_name) for var_name in feed_order
    ]
    val_feeder = fluid.DataFeeder(val_feed_list, place)
    dev_data = data.BidirectionalLMDataset(
        args.test_path, vocab, test=True, shuffle_on_load=False)
    dev_data_iter = lambda: dev_data.iter_batches(args.batch_size * dev_count, args.num_steps)
    dev_reader = read_multiple(dev_data_iter, args.batch_size, dev_count)

    last_hidden_values = np.zeros(
        (dev_count, args.num_layers * 2 * args.batch_size * args.embed_size),
        dtype='float32')
    last_cell_values = np.zeros(
        (dev_count, args.num_layers * 2 * args.batch_size * args.hidden_size),
        dtype='float32')
    for batch_id, batch_list in enumerate(dev_reader(), 1):
        feed_data = batch_reader(batch_list, args)
        feed = list(val_feeder.feed_parallel(feed_data, dev_count))
        for i in range(dev_count):
            init_hidden_tensor = fluid.core.LoDTensor()
            if args.use_gpu:
                placex = fluid.CUDAPlace(i)
            else:
                placex = fluid.CPUPlace()
            init_hidden_tensor.set(last_hidden_values[i], placex)
            init_cell_tensor = fluid.core.LoDTensor()
            init_cell_tensor.set(last_cell_values[i], placex)

            feed[i]['init_hiddens'] = init_hidden_tensor
            feed[i]['init_cells'] = init_cell_tensor

        #todo test pe has bug in r1.3
        #import pdb; pdb.set_trace()
        last_hidden_values = []
        last_cell_values = []
        for i in range(dev_count):
            val_fetch_outs = exe.run(
                program=infer_prog,
                feed=feed[i],
                fetch_list=[
                    infer_model.loss.name, infer_model.last_hidden.name,
                    infer_model.last_cell.name
                ],  # + [x[0] for x in names] + [x[0] for x in grad_names],
                return_numpy=False)
            last_hidden_values.append(np.array(val_fetch_outs[1]))
            last_cell_values.append(np.array(val_fetch_outs[2]))
            total_loss += np.array(val_fetch_outs[0]).sum()

            n_batch_cnt += len(np.array(val_fetch_outs[0]))
            total_cnt += len(np.array(val_fetch_outs[0]))
            n_batch_loss += np.array(val_fetch_outs[0]).sum()

        last_hidden_values = np.array(last_hidden_values).reshape((
            dev_count, args.num_layers * 2 * args.batch_size * args.embed_size))
        last_cell_values = np.array(last_cell_values).reshape(
            (dev_count,
             args.num_layers * 2 * args.batch_size * args.hidden_size))

        log_every_n_batch = args.log_interval
        if log_every_n_batch > 0 and batch_id % log_every_n_batch == 0:
            logger.info('Average dev loss from batch {} to {} is {}'.format(
                batch_id - log_every_n_batch + 1, batch_id, "%.10f" % (
                    n_batch_loss / n_batch_cnt)))
            n_batch_loss = 0.0
            n_batch_cnt = 0
        batch_offset = 0

    ppl = np.exp(total_loss / total_cnt)
    return ppl
Ejemplo n.º 11
0
def train_loop(args,
               logger,
               vocab,
               train_progs,
               infer_progs,
               optimizer,
               nccl2_num_trainers=1,
               nccl2_trainer_id=0,
               worker_endpoints=None):
    train_prog, train_startup_prog, train_model = train_progs
    infer_prog, infer_startup_prog, infer_model = infer_progs

    # prepare device
    place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
    exe = Executor(place)
    if not args.use_gpu:
        place = fluid.CPUPlace()
        import multiprocessing
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    else:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()

    if args.load_dir:
        logger.info('load pretrained checkpoints from {}'.format(args.load_dir))
        # Todo: why not need to run train_startup_prog before load_persistables
        fluid.io.load_persistables(exe, args.load_dir, main_program=train_prog)
    elif args.load_pretraning_params:
        logger.info('load pretrained params from {}'.format(args.load_pretraning_params))
        exe.run(train_startup_prog)
        init_pretraining_params(exe, args.load_pretraning_params, main_program=train_prog)
    else:
        exe.run(train_startup_prog)

    # prepare data
    feed_list = [
        train_prog.global_block().var(var_name)
        for var_name in train_model.feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    logger.info('Training the model...')
    exe_strategy = fluid.parallel_executor.ExecutionStrategy()
    if args.para_print:
        exe_strategy.num_threads = 1
        debug_init(train_prog, train_model.grad_vars,
                   train_model.grad_vars_name)
        with open("program.desc", 'w') as f:
            print(str(train_prog), file=f)
    parallel_executor = fluid.ParallelExecutor(
        loss_name=train_model.loss.name,
        main_program=train_prog,
        use_cuda=bool(args.use_gpu),
        exec_strategy=exe_strategy,
        num_trainers=nccl2_num_trainers,
        trainer_id=nccl2_trainer_id)
    load_params(train_prog, parallel_executor, place, logger, args)
    print_para(train_prog, parallel_executor, logger, optimizer, args)

    logger.info("begin to load data")
    train_data = data.BidirectionalLMDataset(
        args.train_path,
        vocab,
        test=(not args.shuffle),
        shuffle_on_load=args.shuffle)
    logger.info("finished load vocab")

    # get train epoch size
    log_interval = args.log_interval
    total_time = 0.0
    batch_size = args.batch_size
    hidden_size = args.hidden_size
    custom_samples_array = np.zeros(
        (batch_size, args.num_steps, args.n_negative_samples_batch + 1),
        dtype='int64')
    custom_probabilities_array = np.zeros(
        (batch_size, args.num_steps, args.n_negative_samples_batch + 1),
        dtype='float32')
    for i in range(batch_size):
        for j in range(0, args.num_steps):
            for k in range(0, args.n_negative_samples_batch + 1):
                custom_samples_array[i][j][k] = k
                custom_probabilities_array[i][j][k] = 1.0

    for epoch_id in range(args.max_epoch):
        start_time = time.time()
        logger.info("epoch id {}".format(epoch_id))
        train_data_iter = lambda: train_data.iter_batches(batch_size * dev_count, args.num_steps)
        train_reader = read_multiple(train_data_iter, batch_size, dev_count)

        total_num = 0
        n_batch_loss = 0.0
        n_batch_cnt = 0
        last_hidden_values = np.zeros(
            (dev_count, args.num_layers * 2 * batch_size * args.embed_size),
            dtype='float32')
        last_cell_values = np.zeros(
            (dev_count, args.num_layers * 2 * batch_size * hidden_size),
            dtype='float32')

        begin_time = time.time()
        for batch_id, batch_list in enumerate(train_reader(), 1):
            feed_data = batch_reader(batch_list, args)
            feed = list(feeder.feed_parallel(feed_data, dev_count))
            for i in range(dev_count):
                init_hidden_tensor = fluid.core.LoDTensor()
                if args.use_gpu:
                    placex = fluid.CUDAPlace(i)
                else:
                    placex = fluid.CPUPlace()
                init_hidden_tensor.set(last_hidden_values[i], placex)
                init_cell_tensor = fluid.core.LoDTensor()
                init_cell_tensor.set(last_cell_values[i], placex)

                feed[i]['init_hiddens'] = init_hidden_tensor
                feed[i]['init_cells'] = init_cell_tensor

            fetch_outs = parallel_executor.run(
                feed=feed,
                fetch_list=[
                    train_model.loss.name, train_model.last_hidden.name,
                    train_model.last_cell.name
                ],  # + [x[0] for x in names] + [x[0] for x in grad_names],
                return_numpy=False)
            cost_train = np.array(fetch_outs[0]).mean()
            #import pdb; pdb.set_trace()
            last_hidden_values = np.array(fetch_outs[1])
            last_hidden_values = last_hidden_values.reshape(
                (dev_count, args.num_layers * 2 * batch_size * args.embed_size))
            last_cell_values = np.array(fetch_outs[2])
            last_cell_values = last_cell_values.reshape((
                dev_count, args.num_layers * 2 * batch_size * args.hidden_size))

            #vars = fetch_outs[2:2+len(names)]
            #grad_vars = fetch_outs[2+len(names):]

            total_num += args.batch_size * dev_count
            n_batch_loss += np.array(fetch_outs[0]).sum()
            #logger.info("n_batch_loss from {} to {} is {}, {} ".format(
            #    batch_id - log_interval, batch_id, n_batch_loss,
            #    np.array(fetch_outs[0]).sum()))
            n_batch_cnt += len(np.array(fetch_outs[0]))

            if batch_id > 0 and batch_id % log_interval == 0:
                #vars_print(logger, args, vars=(vars, names), grad_vars=(grad_vars, grad_names))
                print_para(train_prog, parallel_executor, logger, optimizer,
                           args)
                smoothed_ppl = np.exp(n_batch_loss / n_batch_cnt)
                ppl = np.exp(
                    np.array(fetch_outs[0]).sum() /
                    len(np.array(fetch_outs[0])))
                used_time = time.time() - begin_time
                speed = log_interval / used_time
                logger.info(
                    "[train] epoch:{}, step:{}, loss:{:.3f}, ppl:{:.3f}, smoothed_ppl:{:.3f}, speed:{:.3f}".
                    format(epoch_id, batch_id, n_batch_loss / n_batch_cnt, ppl,
                           smoothed_ppl, speed))
                n_batch_loss = 0.0
                n_batch_cnt = 0
                begin_time = time.time()
            if batch_id > 0 and batch_id % args.dev_interval == 0:
                valid_ppl = eval(vocab, infer_progs, dev_count, logger, args)
                logger.info("valid ppl {}".format(valid_ppl))
            if batch_id > 0 and batch_id % args.save_interval == 0:
                model_path = os.path.join(args.para_save_dir,
                                          str(batch_id + epoch_id))
                if not os.path.isdir(model_path):
                    os.makedirs(model_path)
                fluid.io.save_persistables(
                    executor=exe, dirname=model_path, main_program=train_prog)
            if args.detail and batch_id > 100:
                exit()

        end_time = time.time()
        total_time += end_time - start_time
        logger.info("train ppl {}".format(ppl))

        if epoch_id == args.max_epoch - 1 and args.enable_ce:
            logger.info("lstm_language_model_duration\t%s" %
                        (total_time / args.max_epoch))
            logger.info("lstm_language_model_loss\t%s" % ppl[0])

        model_path = os.path.join(args.para_save_dir, str(epoch_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(
            executor=exe, dirname=model_path, main_program=train_prog)
        valid_ppl = eval(vocab, infer_progs, dev_count, logger, args)
        logger.info("valid ppl {}".format(valid_ppl))
    test_ppl = eval(vocab, infer_progs, dev_count, place, logger, args)
    logger.info("test ppl {}".format(test_ppl))
Ejemplo n.º 12
0
# 使用交叉熵函数,描述真实样本标签与预测概率之间的误差
cost = fluid.layers.cross_entropy(input=predict, label=label)

avg_cost = fluid.layers.mean(cost)
# 计算分类准确率
acc = fluid.layers.accuracy(input=predict, label=label)

# 定义优化函数
# 使用adam算法优化,学习率设定为0.001
optimizer = fluid.optimizer.AdamaxOptimizer(learning_rate=0.001)
opts = optimizer.minimize(avg_cost)

# 模型训练
# 创建训练的Executor
use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
test_program = fluid.default_main_program().clone(for_test=True)
exe = fluid.Executor(place)
exe.run(fluid.default_main_program())

# 告知网络传入数据分两部分,image,label
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

# 展示模型曲线
all_train_iter = 0
all_train_iters = []
all_train_costs = []
all_train_accs = []


def draw_train_process(title, iters, costs, accs, label_cost, label_acc):
Ejemplo n.º 13
0
def train():
    learning_rate = cfg.learning_rate
    image_shape = [3, 512, 512]

    if cfg.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        import random
        random.seed(0)
        np.random.seed(0)

    devices_num = get_device_num()
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            model = model_builder.EAST(
                add_conv_body_func=resnet.ResNet(),
                use_random=use_random)
            model.build_model(image_shape)
            losses, keys = model.loss()
            loss = losses[0]
            fetch_list = losses

            boundaries = cfg.lr_steps
            gamma = cfg.lr_gamma
            step_num = len(cfg.lr_steps)
            values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

            lr = exponential_with_warmup_decay(
                learning_rate=learning_rate,
                boundaries=boundaries,
                values=values,
                warmup_iter=cfg.warm_up_iter,
                warmup_factor=cfg.warm_up_factor)
            optimizer = fluid.optimizer.AdamOptimizer(learning_rate=lr, regularization=fluid.regularizer.L2Decay(cfg.weight_decay))
            optimizer.minimize(loss)
            fetch_list = fetch_list + [lr]

            for var in fetch_list:
                var.persistable = True

    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    build_strategy = fluid.BuildStrategy()
    build_strategy.fuse_all_optimizer_ops = False
    build_strategy.fuse_elewise_add_act_ops = True
    build_strategy.sync_batch_norm=True
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_iteration_per_drop_scope = 1
    exe.run(startup_prog)

    if cfg.pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)
    compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)


    dataset = icdar.ICDAR2015Dataset()
    data_generator = dataset.get_batch(num_workers=24,
                                     input_size=512,
                                     batch_size=14)

    def train_loop():
        start_time = time.time()
        prev_start_time = start_time
        start = start_time
        train_stats = TrainingStats(cfg.log_window, keys)
        #for iter_id, data in enumerate(next(data_generator)):
        for iter_id in range(100000):
            data = next(data_generator)
            #for data in data_list:
            prev_start_time = start_time
            start_time = time.time()
            outs = exe.run(compiled_train_prog, fetch_list=[v.name for v in fetch_list],
                                 feed={"input_images": data[0],
                                       "input_score_maps": data[2],
                                       "input_geo_maps": data[3],
                                       "input_training_masks": data[4]})
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            strs = '{}, batch: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id,
                np.mean(outs[-1]), logs, start_time - prev_start_time)
            if iter_id % 10 == 0:
                print(strs)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model(exe, "model_iter{}".format(iter_id), train_prog)
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()
    train_loop()
Ejemplo n.º 14
0
 def test_case(self):
     self._test_case(fluid.CPUPlace())
     if fluid.is_compiled_with_cuda():
         self._test_case(fluid.CUDAPlace(0))
        'models/lm/zh_giga.no_cna_cmn.prune01244.klm',
        "Filepath for language model.")

add_arg('decoding_method', str,
        'ctc_beam_search',
        # 'ctc_greedy',
        "Decoding method. Options: ctc_beam_search, ctc_greedy",
        choices=['ctc_beam_search', 'ctc_greedy'])
add_arg('specgram_type', str,
        'linear',
        "Audio feature type. Options: linear, mfcc.",
        choices=['linear', 'mfcc'])
args = parser.parse_args()

if args.use_gpu:
    place = fluid.CUDAPlace(0)
else:
    place = fluid.CPUPlace()
data_generator = DataGenerator(
    vocab_filepath=args.vocab_path,
    mean_std_filepath=args.mean_std_path,
    augmentation_config='{}',
    specgram_type=args.specgram_type,
    keep_transcription_text=True,
    place=place,
    is_training=False)
# prepare ASR model
ds2_model = DeepSpeech2Model(
    vocab_size=data_generator.vocab_size,
    num_conv_layers=args.num_conv_layers,
    num_rnn_layers=args.num_rnn_layers,
Ejemplo n.º 16
0
def main():
    batch_size = 20

    if args.enable_ce:
        train_reader = paddle.batch(
            paddle.dataset.uci_housing.train(), batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.dataset.uci_housing.test(), batch_size=batch_size)
    else:
        train_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.uci_housing.train(), buf_size=500),
            batch_size=batch_size)
        test_reader = paddle.batch(
            paddle.reader.shuffle(
                paddle.dataset.uci_housing.test(), buf_size=500),
            batch_size=batch_size)

    # feature vector of length 13
    x = fluid.data(name='x', shape=[None, 13], dtype='float32')
    y = fluid.data(name='y', shape=[None, 1], dtype='float32')

    main_program = fluid.default_main_program()
    startup_program = fluid.default_startup_program()

    if args.enable_ce:
        main_program.random_seed = 90
        startup_program.random_seed = 90

    y_predict = fluid.layers.fc(input=x, size=1, act=None)
    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
    avg_loss = fluid.layers.mean(cost)

    test_program = main_program.clone(for_test=True)

    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
    sgd_optimizer.minimize(avg_loss)

    # can use CPU or GPU
    use_cuda = args.use_gpu
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    # Specify the directory to save the parameters
    params_dirname = "fit_a_line.inference.model"
    num_epochs = args.num_epochs

    # main train loop.
    feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
    exe.run(startup_program)

    train_prompt = "Train cost"
    test_prompt = "Test cost"
    step = 0

    exe_test = fluid.Executor(place)

    for pass_id in range(num_epochs):
        for data_train in train_reader():
            avg_loss_value, = exe.run(
                main_program,
                feed=feeder.feed(data_train),
                fetch_list=[avg_loss])
            if step % 10 == 0:  # record a train cost every 10 batches
                print("%s, Step %d, Cost %f" %
                      (train_prompt, step, avg_loss_value[0]))

            if step % 100 == 0:  # record a test cost every 100 batches
                test_metics = train_test(
                    executor=exe_test,
                    program=test_program,
                    reader=test_reader,
                    fetch_list=[avg_loss],
                    feeder=feeder)
                print("%s, Step %d, Cost %f" %
                      (test_prompt, step, test_metics[0]))
                # If the accuracy is good enough, we can stop the training.
                if test_metics[0] < 10.0:
                    break

            step += 1

            if math.isnan(float(avg_loss_value[0])):
                sys.exit("got NaN loss, training failed.")
        if params_dirname is not None:
            # We can save the trained parameters for the inferences later
            fluid.io.save_inference_model(params_dirname, ['x'], [y_predict],
                                          exe)

        if args.enable_ce and pass_id == args.num_epochs - 1:
            print("kpis\ttrain_cost\t%f" % avg_loss_value[0])
            print("kpis\ttest_cost\t%f" % test_metics[0])

    infer_exe = fluid.Executor(place)
    inference_scope = fluid.core.Scope()

    # infer
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names, fetch_targets
         ] = fluid.io.load_inference_model(params_dirname, infer_exe)
        batch_size = 10

        infer_reader = paddle.batch(
            paddle.dataset.uci_housing.test(), batch_size=batch_size)

        infer_data = next(infer_reader())
        infer_feat = numpy.array(
            [data[0] for data in infer_data]).astype("float32")
        infer_label = numpy.array(
            [data[1] for data in infer_data]).astype("float32")

        assert feed_target_names[0] == 'x'
        results = infer_exe.run(
            inference_program,
            feed={feed_target_names[0]: numpy.array(infer_feat)},
            fetch_list=fetch_targets)

        print("infer results: (House Price)")
        for idx, val in enumerate(results[0]):
            print("%d: %.2f" % (idx, val))

        print("\nground truth:")
        for idx, val in enumerate(infer_label):
            print("%d: %.2f" % (idx, val))

        save_result(results[0], infer_label)
Ejemplo n.º 17
0
    def run_gpu_fleet_api_trainer(self, args):
        assert args.update_method == "nccl2"

        self.lr = args.lr

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = 1

        dist_strategy = DistributedStrategy()
        dist_strategy.exec_strategy = exec_strategy
        dist_strategy.fuse_memory_size = 1  # MB
        dist_strategy.fuse_laryer_size = 1
        if args.use_local_sgd:
            dist_strategy.use_local_sgd = True
        if args.ut4grad_allreduce:
            dist_strategy._ut4grad_allreduce = True

        role = role_maker.PaddleCloudRoleMaker(is_collective=True)
        fleet.init(role)
        print_to_err("gpu_fleet", "fleet.node_num:")
        # "fleet.node_id:", fleet.node_id(),
        # "fleet.trainer_num:", fleet.worker_num())

        test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
            self.get_model(batch_size=args.batch_size, dist_strategy=dist_strategy)

        trainer_prog = fleet._origin_program
        dist_prog = fleet.main_program

        device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
        place = fluid.CUDAPlace(device_id)

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        eprint(type(self).__name__, "run worker startup program done.")

        feed_var_list = [
            var for var in trainer_prog.global_block().vars.values()
            if var.is_data
        ]

        feeder = fluid.DataFeeder(feed_var_list, place)
        reader_generator = train_reader()

        def get_data():
            origin_batch = next(reader_generator)
            if args.update_method != "local" and args.use_reader_alloc:
                new_batch = []
                for offset, item in enumerate(origin_batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return origin_batch

        print_to_err(type(self).__name__, "begin to train on trainer")
        out_losses = []
        for i in six.moves.xrange(RUN_STEP):
            loss, = exe.run(dist_prog,
                            fetch_list=[avg_cost.name],
                            feed=feeder.feed(get_data()))
            out_losses.append(loss[0])
            print_to_err(type(self).__name__, "run step %d finished" % i)
        print_to_err(type(self).__name__, "trainer run finished")

        if six.PY2:
            print(pickle.dumps(out_losses))
        else:
            sys.stdout.buffer.write(pickle.dumps(out_losses))
def get_places():
    places = [fluid.CPUPlace()]
    if fluid.core.is_compiled_with_cuda():
        places.append(fluid.CUDAPlace(0))
    return places
Ejemplo n.º 19
0
    def run_trainer(self, args):

        seed = 90
        device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
        place = fluid.CUDAPlace(device_id)

        def _get_data(batch):
            if args.update_method != "local":
                new_batch = []
                for offset, item in enumerate(batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return batch

        with fluid.dygraph.guard(place):
            fluid.default_startup_program().random_seed = seed
            fluid.default_main_program().random_seed = seed
            np.random.seed(seed)
            import random
            random.seed = seed
            model, train_reader, opt = self.get_model()
            nranks = len(args.endpoints.split(",")) if args.endpoints else 1

            if args.update_method == "nccl2":
                strategy = dygraph.parallel.ParallelStrategy()
                strategy.nranks = nranks
                strategy.local_rank = args.trainer_id
                strategy.trainer_endpoints = args.endpoints.split(",")
                strategy.current_endpoint = args.current_endpoint
                print_to_err(
                    type(self).__name__,
                    "begin to prepare context in dygraph with nccl2")
                dygraph.parallel.prepare_context(strategy)
                model = dygraph.parallel.DataParallel(model, strategy)
                print_to_err(type(self).__name__, "model built in dygraph")
            out_losses = []
            print_to_err(type(self).__name__, "begin to run dygraph training")
            for step_id, data in enumerate(train_reader()):
                data = _get_data(data)
                if step_id == RUN_STEP:
                    break
                loss = self.run_one_loop(model, opt, data)
                if step_id % 10 == 0:
                    print_to_err(
                        type(self).__name__,
                        "loss at step %d: %f" % (step_id, loss.numpy()))
                out_losses.append(loss.numpy())

                # FIXME(Yancey1989): scale the loss inplace
                if args.update_method == "nccl2":
                    loss = model.scale_loss(loss)

                loss.backward()
                if args.update_method == "nccl2":
                    model.apply_collective_grads()

                opt.minimize(loss)
                model.clear_gradients()
        print_to_out(out_losses)
Ejemplo n.º 20
0
    def __next__(self):
        if self._first_batch is not None:
            batch = self._first_batch
            self._first_batch = None
            return batch
        if self._counter >= self._size and self._size > 0:
            if self._auto_reset:
                self.reset()
            raise StopIteration

        # Gather outputs
        outputs = []
        for p in self._pipes:
            with p._check_api_type_scope(types.PipelineAPIType.ITERATOR):
                outputs.append(p.share_outputs())

        for i in range(self._num_gpus):
            dev_id = self._pipes[i].device_id
            # Initialize dict for all output categories
            category_outputs = dict()
            # Segregate outputs into categories
            for j, out in enumerate(outputs[i]):
                category_outputs[self.output_map[j]] = out

            pd_gpu_place = fluid.CUDAPlace(dev_id)
            pd_cpu_place = fluid.CPUPlace()

            category_pd_type = dict()
            category_place = dict()
            category_tensors = dict()
            category_shapes = dict()
            category_lengths = dict()
            for cat, out in category_outputs.items():
                lod = self.normalized_map[cat]
                assert out.is_dense_tensor() or lod > 0, \
                    "non-dense tensor lists must have LoD > 0"

                if lod > 0:
                    # +1 for batch dim
                    seq_len = recursive_length(out, lod + 1)[1:]
                    shape = out.at(0).shape
                    if callable(shape):
                        shape = shape()
                    shape = [sum(seq_len[-1])] + list(shape[lod:])
                    category_shapes[cat] = shape
                    category_lengths[cat] = seq_len
                else:
                    out = out.as_tensor()
                    category_shapes[cat] = out.shape()
                    category_lengths[cat] = []

                category_tensors[cat] = out
                category_pd_type[cat] = to_paddle_type(out)
                if isinstance(out, (TensorGPU, TensorListGPU)):
                    category_place[cat] = pd_gpu_place
                else:
                    category_place[cat] = pd_cpu_place

            if self._data_batches[i] is None:
                pd_tensors = {}
                for cat, lod in self.normalized_map.items():
                    lod_tensor = fluid.core.LoDTensor()
                    lod_tensor._set_dims(category_shapes[cat])
                    pd_tensors[cat] = lod_tensor
                self._data_batches[i] = pd_tensors
            else:
                pd_tensors = self._data_batches[i]

            # Copy data from DALI Tensors to LoDTensors
            for cat, tensor in category_tensors.items():
                if hasattr(tensor, 'shape'):  # could be tensor list
                    assert self._dynamic_shape or \
                        tensor.shape() == pd_tensors[cat].shape(), \
                        ("Shapes do not match: DALI tensor has size {0}, "
                         "but LoDTensor has size {1}".format(
                             tensor.shape(), pd_tensors[cat].shape()))

                lod_tensor = pd_tensors[cat]
                lod_tensor._set_dims(category_shapes[cat])
                seq_len = category_lengths[cat]
                lod_tensor.set_recursive_sequence_lengths(seq_len)
                ptr = lod_tensor._mutable_data(category_place[cat],
                                               category_pd_type[cat])
                feed_ndarray(tensor, ptr)

        for p in self._pipes:
            with p._check_api_type_scope(types.PipelineAPIType.ITERATOR):
                p.release_outputs()
                p.schedule_run()

        self._counter += self._num_gpus * self.batch_size

        if (not self._fill_last_batch) and (self._counter >
                                            self._size) and self._size > 0:
            # First calculate how much data is required to
            # return exactly self._size entries.
            diff = self._num_gpus * self.batch_size - (self._counter -
                                                       self._size)
            # Figure out how many GPUs to grab from.
            num_gpus_to_grab = int(math.ceil(diff / self.batch_size))
            # Figure out how many results to grab from the last GPU
            # (as a fractional GPU batch may be required to bring us
            # right up to self._size).
            mod_diff = diff % self.batch_size
            data_from_last_gpu = mod_diff if mod_diff else self.batch_size

            # Grab the relevant data.
            # 1) Grab everything from the relevant GPUs.
            # 2) Grab the right data from the last GPU.
            # 3) Append data together correctly and return.
            output = self._data_batches[0:num_gpus_to_grab]
            output[-1] = output[-1].copy()
            for cat in self.output_map:
                lod_tensor = output[-1][cat]
                output[-1][cat] = lod_tensor_clip(lod_tensor,
                                                  data_from_last_gpu)
            return output

        return self._data_batches
Ejemplo n.º 21
0
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, Linear
import numpy as np
import Data_Load_Module
import Network


#调用加载数据的函数
train_loader = Data_Load_Module.load_data('train')

    
#在使用GPU机器时,可以将use_gpu变量设置成True
use_gpu = True
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()

with fluid.dygraph.guard(place):
    model = Network.MNIST()
    model.train() 
    
    #四种优化算法的设置方案,可以逐一尝试效果
    #optimizer = fluid.optimizer.SGDOptimizer(learning_rate=0.01, parameter_list=model.parameters())
    #optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.01, momentum=0.9, parameter_list=model.parameters())
    #optimizer = fluid.optimizer.AdagradOptimizer(learning_rate=0.01, parameter_list=model.parameters())
    optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.01, parameter_list=model.parameters())
    
    EPOCH_NUM = 5
    for epoch_id in range(EPOCH_NUM):
        for batch_id, data in enumerate(train_loader()):
            #准备数据
Ejemplo n.º 22
0
def main():
    batch_size = 32
    num_epochs = args.num_epochs
    net_type = args.net

    # 训练ckpt和inf模型路径
    param_base_dir = os.path.join(code_base_dir, 'params')
    param_base_dir = os.path.join(param_base_dir, net_type)
    infer_param_path = os.path.join(param_base_dir, "inf")
    ckpt_param_path = os.path.join(param_base_dir, "ckpt")
    print(infer_param_path)
    print(ckpt_param_path)

    train_reader = paddle.batch(
        paddle.reader.shuffle(data_reader(), int(batch_size * 1.5)),
        batch_size)
    test_reader = paddle.batch(
        paddle.reader.shuffle(data_reader(8, 10), int(batch_size * 1.5)),
        batch_size)

    train_program = fluid.Program()
    train_init = fluid.Program()

    with fluid.program_guard(train_program, train_init):
        image = fluid.layers.data(name='image',
                                  shape=[3, 512, 512],
                                  dtype='float32')
        label = fluid.layers.data(name='label',
                                  shape=[1, 512, 512],
                                  dtype='int32')
        train_loader = fluid.io.DataLoader.from_generator(
            feed_list=[image, label], capacity=batch_size)
        test_loader = fluid.io.DataLoader.from_generator(
            feed_list=[image, label], capacity=batch_size)

        if net_type == "unet_simple":
            prediction = unet_simple(image, 2, [512, 512])
        elif net_type == "unet_base":
            prediction = unet_base(image, 2, [512, 512])
        elif net_type == "deeplabv3":
            prediction = deeplabv3p(image, 2)
        else:
            print("错误的网络类型")
            sys.exit(0)

        avg_loss = create_loss(prediction, label, 2)

        miou = mean_iou(prediction, label, 2)

        # 		decay=paddle.fluid.regularizer.L2Decay(0.1)
        # optimizer = fluid.optimizer.SGD(learning_rate=0.0005,regularization=decay)
        # optimizer = fluid.optimizer.DecayedAdagradOptimizer(learning_rate=0.02,regularization=decay)
        # optimizer = fluid.optimizer.RMSProp(learning_rate=0.1,momentum=0.8,centered=True, regularization=decay)

        optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.003)

        # optimizer = fluid.optimizer.MomentumOptimizer(learning_rate=0.006, momentum=0.8,regularization=decay)

        optimizer.minimize(avg_loss)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(train_init)
    # fluid.io.load_persistables(exe, ckpt_param_path, train_init)

    exe_test = fluid.Executor(place)

    test_program = train_program.clone(for_test=True)

    # train_program=fluid.CompiledProgram(train_program).with_data_parallel(loss_name=avg_loss.name)
    test_program = fluid.CompiledProgram(test_program).with_data_parallel(
        loss_name=avg_loss.name)

    train_loader.set_sample_list_generator(train_reader, places=place)
    test_loader.set_sample_list_generator(test_reader, places=place)

    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    step = 1
    best_miou = 0

    train_prompt = "Train_miou"
    test_prompt = "Test_miou"

    plot_prompt = Ploter(train_prompt, test_prompt)

    for pass_id in range(num_epochs):
        for data_train in train_loader():
            avg_loss_value, miou_value = exe.run(train_program,
                                                 feed=data_train,
                                                 fetch_list=[avg_loss, miou])

            if step % 10 == 0:
                print("\t\tTrain pass %d, Step %d, Cost %f, Miou %f" %
                      (pass_id, step, avg_loss_value[0], miou_value[0]))

            # if step % 10 ==0:
            # plot_prompt.append(train_prompt, step, miou_value[0])
            # plot_prompt.plot()

            eval_miou = 0
            if step % 100 == 0:
                auc_metric = fluid.metrics.Auc("AUC")
                test_losses = []
                test_mious = []
                for _, test_data in enumerate(test_loader()):
                    # print(test_data)
                    # input("pause")

                    _, test_loss, test_miou = exe_test.run(
                        test_program,
                        feed=test_data,
                        fetch_list=[prediction, avg_loss, miou])
                    test_losses.append(test_loss[0])
                    test_mious.append(test_miou[0])

                eval_miou = np.average(np.array(test_mious))
                # plot_prompt.append(test_prompt, step, eval_miou)
                # plot_prompt.plot()

                print("Test loss: %f ,miou: %f" %
                      (np.average(np.array(test_losses)), eval_miou))

            if math.isnan(float(avg_loss_value[0])):
                sys.exit("got NaN loss, training failed.")

            if step % 100 == 0 and param_base_dir is not None and eval_miou > best_miou:
                best_miou = eval_miou
                print("Saving params of step: %d" % step)
                fluid.io.save_inference_model(infer_param_path,
                                              feeded_var_names=['image'],
                                              target_vars=[prediction],
                                              executor=exe,
                                              main_program=train_program)
                fluid.io.save_persistables(exe, ckpt_param_path, train_program)
            step += 1
    print(best_miou)
Ejemplo n.º 23
0
 def get_all_places(self):
     p = [fluid.CPUPlace()]
     if fluid.is_compiled_with_cuda():
         p.append(fluid.CUDAPlace(0))
     return p
def main(use_cuda):
    """
    Advbox demo which demonstrate how to use advbox.
    """
    TOTAL_NUM = 500
    IMG_NAME = 'img'
    LABEL_NAME = 'label'

    img = fluid.layers.data(name=IMG_NAME, shape=[1, 28, 28], dtype='float32')
    # gradient should flow
    img.stop_gradient = False
    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')
    logits = mnist_cnn_model(img)
    cost = fluid.layers.cross_entropy(input=logits, label=label)
    avg_cost = fluid.layers.mean(x=cost)

    #根据配置选择使用CPU资源还是GPU资源
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)

    BATCH_SIZE = 1

    test_reader = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.mnist.test(), buf_size=128 * 10),
                               batch_size=BATCH_SIZE)

    fluid.io.load_params(exe,
                         "./mnist/",
                         main_program=fluid.default_main_program())

    # advbox demo
    m = PaddleModel(fluid.default_main_program(),
                    IMG_NAME,
                    LABEL_NAME,
                    logits.name,
                    avg_cost.name, (-1, 1),
                    channel_axis=1)
    #使用静态FGSM epsilon不可变
    attack = FGSM_static(m)
    attack_config = {"epsilon": 0.01}

    # use test data to generate adversarial examples
    total_count = 0
    fooling_count = 0
    for data in test_reader():
        total_count += 1
        adversary = Adversary(data[0][0], data[0][1])

        # FGSM non-targeted attack
        adversary = attack(adversary, **attack_config)

        if adversary.is_successful():
            fooling_count += 1
            #print(
            #    'attack success, original_label=%d, adversarial_label=%d, count=%d'
            #    % (data[0][1], adversary.adversarial_label, total_count))
        else:
            logger.info('attack failed, original_label=%d, count=%d' %
                        (data[0][1], total_count))

        if total_count >= TOTAL_NUM:
            print(
                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
                % (fooling_count, total_count,
                   float(fooling_count) / total_count))
            break
    print("fgsm attack done without any defence")

    #使用FeatureFqueezingDefence

    # advbox FeatureFqueezingDefence demo

    n = PaddleLabelSmoothingDefenceModel(fluid.default_main_program(),
                                         IMG_NAME,
                                         LABEL_NAME,
                                         logits.name,
                                         avg_cost.name, (-1, 1),
                                         channel_axis=1,
                                         preprocess=None,
                                         smoothing=0.1)
    attack_new = FGSM_static(n)
    attack_config = {"epsilon": 0.01}

    total_count = 0
    fooling_count = 0
    for data in test_reader():
        total_count += 1

        #不设置y 会自动获取
        adversary = Adversary(data[0][0], None)

        # FGSM non-targeted attack
        adversary = attack_new(adversary, **attack_config)

        if adversary.is_successful():
            fooling_count += 1
            logger.info(
                'attack success, original_label=%d, adversarial_label=%d, count=%d'
                % (data[0][1], adversary.adversarial_label, total_count))
        else:
            logger.info('attack failed, original_label=%d, count=%d' %
                        (data[0][1], total_count))

        if total_count >= TOTAL_NUM:
            print(
                "[TEST_DATASET]: fooling_count=%d, total_count=%d, fooling_rate=%f"
                % (fooling_count, total_count,
                   float(fooling_count) / total_count))
            break
    print("fgsm attack done with LabelSmoothingDefence")
Ejemplo n.º 25
0
def train(args):
    config = parse_config(args.config)
    train_config = merge_configs(config, 'train', vars(args))
    valid_config = merge_configs(config, 'valid', vars(args))
    print_configs(train_config, 'Train')

    use_data_parallel = False
    trainer_count = fluid.dygraph.parallel.Env().nranks
    place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) \
        if use_data_parallel else fluid.CUDAPlace(0)

    with fluid.dygraph.guard(place):
        if use_data_parallel:
            strategy = fluid.dygraph.parallel.prepare_context()

        video_model = TSM_ResNet("TSM", train_config)

        optimizer = create_optimizer(train_config.TRAIN,
                                     video_model.parameters())
        if use_data_parallel:
            video_model = fluid.dygraph.parallel.DataParallel(
                video_model, strategy)

        bs_denominator = 1
        if args.use_gpu:
            # check number of GPUs
            gpus = os.getenv("CUDA_VISIBLE_DEVICES", "")
            if gpus == "":
                pass
            else:
                gpus = gpus.split(",")
                num_gpus = len(gpus)
                assert num_gpus == train_config.TRAIN.num_gpus, \
                       "num_gpus({}) set by CUDA_VISIBLE_DEVICES" \
                       "shoud be the same as that" \
                       "set in {}({})".format(
                       num_gpus, args.config, train_config.TRAIN.num_gpus)
            bs_denominator = train_config.TRAIN.num_gpus

        train_config.TRAIN.batch_size = int(train_config.TRAIN.batch_size /
                                            bs_denominator)

        train_reader = KineticsReader(mode="train", cfg=train_config)

        train_reader = train_reader.create_reader()
        if use_data_parallel:
            train_reader = fluid.contrib.reader.distributed_batch_reader(
                train_reader)

        for epoch in range(train_config.TRAIN.epoch):
            video_model.train()
            total_loss = 0.0
            total_acc1 = 0.0
            total_acc5 = 0.0
            total_sample = 0
            for batch_id, data in enumerate(train_reader()):
                x_data = np.array([item[0] for item in data])
                y_data = np.array([item[1] for item in data]).reshape([-1, 1])

                imgs = to_variable(x_data)
                labels = to_variable(y_data)
                labels.stop_gradient = True
                outputs = video_model(imgs)
                loss = fluid.layers.cross_entropy(input=outputs,
                                                  label=labels,
                                                  ignore_index=-1)
                avg_loss = fluid.layers.mean(loss)

                acc_top1 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=1)
                acc_top5 = fluid.layers.accuracy(input=outputs,
                                                 label=labels,
                                                 k=5)

                if use_data_parallel:
                    avg_loss = video_model.scale_loss(avg_loss)
                    avg_loss.backward()
                    video_model.apply_collective_grads()
                else:
                    avg_loss.backward()
                optimizer.minimize(avg_loss)
                video_model.clear_gradients()

                total_loss += avg_loss.numpy()[0]
                total_acc1 += acc_top1.numpy()[0]
                total_acc5 += acc_top5.numpy()[0]
                total_sample += 1

                print('TRAIN Epoch {}, iter {}, loss = {}, acc1 {}, acc5 {}'.
                      format(epoch, batch_id,
                             avg_loss.numpy()[0],
                             acc_top1.numpy()[0],
                             acc_top5.numpy()[0]))

            print(
                'TRAIN End, Epoch {}, avg_loss= {}, avg_acc1= {}, avg_acc5= {}'
                .format(epoch, total_loss / total_sample,
                        total_acc1 / total_sample, total_acc5 / total_sample))
            video_model.eval()
            val(epoch, video_model, valid_config, args)

        if fluid.dygraph.parallel.Env().local_rank == 0:
            fluid.dygraph.save_dygraph(video_model.state_dict(), "final")
        logger.info('[TRAIN] training finished')
Ejemplo n.º 26
0
 def test_grad(self):
     places = [fluid.CPUPlace()]
     if core.is_compiled_with_cuda():
         places.append(fluid.CUDAPlace(0))
     for p in places:
         self.func(p)
Ejemplo n.º 27
0
def main(args):
    ernie_config = ErnieConfig(args.ernie_config_path)
    ernie_config.print_config()

    reader = ClassifyReader(vocab_path=args.vocab_path,
                            label_map_config=args.label_map_config,
                            max_seq_len=args.max_seq_len,
                            do_lower_case=args.do_lower_case,
                            in_tokens=False,
                            is_inference=True)

    predict_prog = fluid.Program()
    predict_startup = fluid.Program()
    with fluid.program_guard(predict_prog, predict_startup):
        with fluid.unique_name.guard():
            predict_pyreader, probs, feed_target_names = create_model(
                args,
                pyreader_name='predict_reader',
                ernie_config=ernie_config,
                is_classify=True,
                is_prediction=True,
                ernie_version=args.ernie_version)

    predict_prog = predict_prog.clone(for_test=True)

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    place = fluid.CUDAPlace(0) if args.use_cuda == True else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(predict_startup)

    if args.init_checkpoint:
        init_pretraining_params(exe, args.init_checkpoint, predict_prog)
    else:
        raise ValueError(
            "args 'init_checkpoint' should be set for prediction!")

    #保存模型
    assert args.save_inference_model_path, "args save_inference_model_path should be set for prediction"
    _, ckpt_dir = os.path.split(args.init_checkpoint.rstrip('/'))
    dir_name = ckpt_dir + '_inference_model'
    model_path = os.path.join(args.save_inference_model_path, dir_name)
    print("save inference model to %s" % model_path)
    fluid.io.save_inference_model(model_path,
                                  feed_target_names, [probs],
                                  exe,
                                  main_program=predict_prog)

    #载入模型
    print("load inference model from %s" % model_path)
    infer_program, feed_target_names, probs = fluid.io.load_inference_model(
        model_path, exe)

    src_ids = feed_target_names[0]
    sent_ids = feed_target_names[1]
    pos_ids = feed_target_names[2]
    input_mask = feed_target_names[3]
    if args.ernie_version == "2.0":
        task_ids = feed_target_names[4]

    #计算相似度
    predict_data_generator = reader.data_generator(input_file=args.predict_set,
                                                   batch_size=args.batch_size,
                                                   epoch=1,
                                                   shuffle=False)

    print("-------------- prediction results --------------")
    np.set_printoptions(precision=4, suppress=True)
    index = 0
    for sample in predict_data_generator():
        src_ids_data = sample[0]
        sent_ids_data = sample[1]
        pos_ids_data = sample[2]
        task_ids_data = sample[3]
        input_mask_data = sample[4]
        if args.ernie_version == "1.0":
            output = exe.run(infer_program,
                             feed={
                                 src_ids: src_ids_data,
                                 sent_ids: sent_ids_data,
                                 pos_ids: pos_ids_data,
                                 input_mask: input_mask_data
                             },
                             fetch_list=probs)
        elif args.ernie_version == "2.0":
            output = exe.run(infer_program,
                             feed={
                                 src_ids: src_ids_data,
                                 sent_ids: sent_ids_data,
                                 pos_ids: pos_ids_data,
                                 task_ids: task_ids_data,
                                 input_mask: input_mask_data
                             },
                             fetch_list=probs)
        else:
            raise ValueError("ernie_version must be 1.0 or 2.0")

        for single_result in output[0]:
            print("example_index:{}\t{}".format(index, single_result))
            index += 1
Ejemplo n.º 28
0
def train(use_cuda):
    classdim = 102
    data_shape = [3, 224, 224]

    images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    net = resnet_cifar10(images, 50)

    predict = fluid.layers.fc(input=net, size=classdim, act='softmax')
    cost = fluid.layers.cross_entropy(input=predict, label=label)
    avg_cost = fluid.layers.mean(cost)
    acc = fluid.layers.accuracy(input=predict, label=label)

    # Test program
    test_program = fluid.default_main_program().clone(for_test=True)

    optimizer = fluid.optimizer.Adam(learning_rate=0.001)
    optimizer.minimize(avg_cost)

    BATCH_SIZE = 20
    PASS_NUM = 50

    train_reader = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.flowers.train(), buf_size=128 * 10),
                                batch_size=BATCH_SIZE)

    test_reader = paddle.batch(paddle.dataset.flowers.test(),
                               batch_size=BATCH_SIZE)

    num_trainers = 1
    trainer_id = 0
    # ========================= for nccl2 dist train =================================
    if os.getenv("PADDLE_INIT_TRAINER_ID", None) != None:
        # append gen_nccl_id at the end of startup program
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID"))
        print("using trainer_id: ", trainer_id)
        port = os.getenv("PADDLE_INIT_PORT")
        worker_ips = os.getenv("PADDLE_WORKERS")
        worker_endpoints = []
        for ip in worker_ips.split(","):
            print(ip)
            worker_endpoints.append(':'.join([ip, port]))
        num_trainers = len(worker_endpoints)
        current_endpoint = os.getenv("POD_IP") + ":" + port
        worker_endpoints.remove(current_endpoint)

        nccl_id_var = fluid.default_startup_program().global_block(
        ).create_var(name="NCCLID",
                     persistable=True,
                     type=fluid.core.VarDesc.VarType.RAW)
        fluid.default_startup_program().global_block().append_op(
            type="gen_nccl_id",
            inputs={},
            outputs={"NCCLID": nccl_id_var},
            attrs={
                "endpoint": current_endpoint,
                "endpoint_list": worker_endpoints,
                "trainer_id": trainer_id
            })
    # ========================= for nccl2 dist train =================================

    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    feeder = fluid.DataFeeder(place=place, feed_list=[images, label])

    def test(pass_id, exe):
        acc_list = []
        avg_loss_list = []
        for tid, test_data in enumerate(test_reader()):
            loss_t, acc_t = exe.run(program=test_program,
                                    feed=feeder.feed(test_data),
                                    fetch_list=[avg_cost, acc])
            if math.isnan(float(loss_t)):
                sys.exit("got NaN loss, training failed.")
            acc_list.append(float(acc_t))
            avg_loss_list.append(float(loss_t))

        acc_value = numpy.array(acc_list).mean()
        avg_loss_value = numpy.array(avg_loss_list).mean()

        print('PassID {0:1}, Test Loss {1:2.2}, Acc {2:2.2}'.format(
            pass_id, float(avg_loss_value), float(acc_value)))

    def train_loop(main_program):
        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        for pass_id in range(PASS_NUM):
            pass_start = time.time()
            num_samples = 0
            for batch_id, data in enumerate(train_reader()):
                exe.run(main_program, feed=feeder.feed(data))
                num_samples += len(data)
            pass_spent = time.time() - pass_start
            print("Pass id %d, train speed %f" %
                  (pass_id, num_samples / pass_spent))
            test(pass_id, exe)

    def train_loop_parallel(main_program):
        startup_exe = fluid.Executor(place)
        startup_exe.run(fluid.default_startup_program())
        exe = fluid.ParallelExecutor(True,
                                     avg_cost.name,
                                     num_threads=1,
                                     allow_op_delay=False,
                                     num_trainers=num_trainers,
                                     trainer_id=trainer_id)

        feeder = fluid.DataFeeder(place=place, feed_list=[images, label])
        for pass_id in range(PASS_NUM):
            num_samples = 0
            start_time = time.time()
            for batch_id, data in enumerate(train_reader()):
                loss, = exe.run([avg_cost.name], feed=feeder.feed(data))
                num_samples += len(data)
                if batch_id % 1 == 0:
                    print("Pass %d, batch %d, loss %s" %
                          (pass_id, batch_id, np.array(loss)))
            pass_elapsed = time.time() - start_time
            print("Pass = %d, Training performance = %f imgs/s\n" %
                  (pass_id, num_samples / pass_elapsed))
            test(pass_id, startup_exe)

    train_loop_parallel(fluid.default_main_program())
Ejemplo n.º 29
0
 def setUp(self):
     self.input = numpy.ones(5).astype("int32")
     self.place = fluid.CUDAPlace(
         0) if fluid.is_compiled_with_cuda() else fluid.CPUPlace()
     self.init_test_func()
Ejemplo n.º 30
0
def main(use_cuda):
    """
    Advbox demo which demonstrate how to use advbox.
    """
    class_dim = 1000
    IMG_NAME = 'img'
    LABEL_NAME = 'label'
    #模型路径
    pretrained_model = "models/resnet_50/115"
    with_memory_optimization = 1
    image_shape = [3,224,224]

    image = fluid.layers.data(name=IMG_NAME, shape=image_shape, dtype='float32')
    # gradient should flow
    image.stop_gradient = False
    label = fluid.layers.data(name=LABEL_NAME, shape=[1], dtype='int64')

    # model definition

    model = resnet.ResNet50()

    out = model.net(input=image, class_dim=class_dim)

    #test_program = fluid.default_main_program().clone(for_test=True)

    if with_memory_optimization:
        fluid.memory_optimize(fluid.default_main_program())

    # 根据配置选择使用CPU资源还是GPU资源
    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    #加载模型参数
    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)


    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)


    # advbox demo
    m = PaddleModel(
        fluid.default_main_program(),
        IMG_NAME,
        LABEL_NAME,
        out.name,
        avg_cost.name, (-1, 1),
        channel_axis=3)
    attack = FGSM(m)

    attack_config = {"epsilons": 0.3}

    test_data = get_image("cat.jpg")
    # 猫对应的标签
    test_label = 285

    adversary = Adversary(test_data, test_label)

    # FGSM non-targeted attack
    adversary = attack(adversary, **attack_config)

    if adversary.is_successful():

        print(
                'attack success, original_label=%d, adversarial_label=%d'
                % (test_label, adversary.adversarial_label))

    else:
        print('attack failed, original_label=%d, ' % (test_label))

    print("fgsm attack done")