def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
                   batch_acc, args, train_prog, startup_prog, nccl_id_var,
                   num_trainers, trainer_id):
    place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
    if not args.use_reader_op:
        feed_var_list = [
            var for var in train_prog.global_block().vars.itervalues()
            if var.is_data
        ]
        feeder = fluid.DataFeeder(feed_var_list, place)

    # generate fake:
    if args.use_fake_data:
        for var in feed_var_list:
            v = startup_prog.global_block()._clone_variable(var)
            var.persistable = True
            v.persistable = True

            real_shape = list(var.shape)
            real_shape[0] = args.batch_size / args.gpus
            startup_prog.global_block().append_op(outputs={"Out": v},
                                                  type="fill_constant",
                                                  attrs={
                                                      "shape": real_shape,
                                                      "value": 1.0,
                                                      "dtype": var.dtype
                                                  })

    if nccl_id_var and trainer_id == 0:
        #FIXME(wuyi): wait other trainer to start listening
        time.sleep(30)

    startup_exe = fluid.Executor(place)
    startup_exe.run(startup_prog)
    strategy = fluid.ExecutionStrategy()
    strategy.num_threads = 1
    strategy.allow_op_delay = False
    exe = fluid.ParallelExecutor(True,
                                 avg_loss.name,
                                 exec_strategy=strategy,
                                 num_trainers=num_trainers,
                                 trainer_id=trainer_id)

    for pass_id in range(args.pass_num):
        num_samples = 0
        iters = 0
        start_time = time.time()
        if not args.use_reader_op:
            reader_generator = train_reader()
        batch_id = 0
        data = None
        while True:
            if not args.use_reader_op:
                data = next(reader_generator, None)
                if data == None:
                    break
            if iters == args.iterations:
                break
            if args.profile and pass_id == 0 and batch_id == 5:
                profiler.start_profiler("All")
            elif args.profile and pass_id == 0 and batch_id == 10:
                profiler.stop_profiler("total", "/tmp/profile_%d" % trainer_id)

            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0
            if args.use_fake_data or args.use_reader_op:
                try:
                    loss, = exe.run([avg_loss.name])
                except fluid.core.EnforceNotMet as ex:
                    break
            else:
                loss, = exe.run([avg_loss.name], feed=feeder.feed(data))
            if args.use_reader_op:
                num_samples += args.batch_size * args.gpus
            else:
                num_samples += len(data)
            iters += 1
            if batch_id % 1 == 0:
                print("Pass %d, batch %d, loss %s" %
                      (pass_id, batch_id, np.array(loss)))
            batch_id += 1

        print_train_time(start_time, time.time(), num_samples)
        print("current activate thread num: ", threading.active_count())
        if not args.no_test and batch_acc and not args.use_reader_op:
            # we have not implement record io for test
            # skip test when use args.use_reader_op
            test_acc = test(startup_exe, infer_prog, test_reader, feeder,
                            batch_acc)
            print("Pass: %d, Test Accuracy: %f\n" % (pass_id, test_acc))
            print_test_acc(pass_id, test_acc)
Beispiel #2
0
def eval():
    # check if set use_gpu=True in paddlepaddle cpu version
    check_gpu(cfg.use_gpu)

    if '2014' in cfg.dataset:
        test_list = 'annotations/instances_val2014.json'
    elif '2017' in cfg.dataset:
        test_list = 'annotations/instances_val2017.json'

    if cfg.debug:
        if not os.path.exists('output'):
            os.mkdir('output')

    model = YOLOv3(is_train=False)
    model.build_model()
    outputs = model.get_pred()
    place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    # yapf: disable
    if cfg.weights:
        def if_exist(var):
            return os.path.exists(os.path.join(cfg.weights, var.name))
        fluid.io.load_vars(exe, cfg.weights, predicate=if_exist)
    # yapf: enable

    # you can save inference model by following code
    # fluid.io.save_inference_model("./output/yolov3", 
    #                               feeded_var_names=['image', 'im_shape'],
    #                               target_vars=outputs,
    #                               executor=exe)

    input_size = cfg.input_size
    test_reader = reader.test(input_size, 1)
    label_names, label_ids = reader.get_label_infos()
    if cfg.debug:
        print("Load in labels {} with ids {}".format(label_names, label_ids))
    feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def get_pred_result(boxes, scores, labels, im_id):
        result = []
        for box, score, label in zip(boxes, scores, labels):
            x1, y1, x2, y2 = box
            w = x2 - x1 + 1
            h = y2 - y1 + 1
            bbox = [x1, y1, w, h]

            res = {
                'image_id': im_id,
                'category_id': label_ids[int(label)],
                'bbox': list(map(float, bbox)),
                'score': float(score)
            }
            result.append(res)
        return result

    dts_res = []
    fetch_list = [outputs]
    total_time = 0
    for batch_id, batch_data in enumerate(test_reader()):
        start_time = time.time()
        batch_outputs = exe.run(fetch_list=[v.name for v in fetch_list],
                                feed=feeder.feed(batch_data),
                                return_numpy=False,
                                use_program_cache=True)
        lod = batch_outputs[0].lod()[0]
        nmsed_boxes = np.array(batch_outputs[0])
        if nmsed_boxes.shape[1] != 6:
            continue
        for i in range(len(lod) - 1):
            im_id = batch_data[i][1]
            start = lod[i]
            end = lod[i + 1]
            if start == end:
                continue
            nmsed_box = nmsed_boxes[start:end, :]
            labels = nmsed_box[:, 0]
            scores = nmsed_box[:, 1]
            boxes = nmsed_box[:, 2:6]
            dts_res += get_pred_result(boxes, scores, labels, im_id)

        end_time = time.time()
        print("batch id: {}, time: {}".format(batch_id, end_time - start_time))
        total_time += end_time - start_time

    with io.open("yolov3_result.json", 'w') as outfile:
        encode_func = unicode if six.PY2 else str
        outfile.write(encode_func(json.dumps(dts_res)))
    print("start evaluate detection result with coco api")
    coco = COCO(os.path.join(cfg.data_dir, test_list))
    cocoDt = coco.loadRes("yolov3_result.json")
    cocoEval = COCOeval(coco, cocoDt, 'bbox')
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()
    print("evaluate done.")

    print("Time per batch: {}".format(total_time / batch_id))
Beispiel #3
0
def train_parallel_do(args,
                      learning_rate,
                      batch_size,
                      num_passes,
                      init_model=None,
                      pretrained_model=None,
                      model_save_dir='model',
                      parallel=True,
                      use_nccl=True,
                      lr_strategy=None,
                      layers=50):
    class_dim = 1000
    image_shape = [3, 224, 224]
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    if parallel:
        places = fluid.layers.device.get_places()
        pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)

        with pd.do():
            image_ = pd.read_input(image)
            label_ = pd.read_input(label)
            if args.model is 'se_resnext':
                out = SE_ResNeXt(input=image_,
                                 class_dim=class_dim,
                                 layers=layers)
            else:
                out = mobile_net(img=image_, class_dim=class_dim)

            cost = fluid.layers.cross_entropy(input=out, label=label_)
            avg_cost = fluid.layers.mean(x=cost)
            acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1)
            acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5)
            pd.write_output(avg_cost)
            pd.write_output(acc_top1)
            pd.write_output(acc_top5)

        avg_cost, acc_top1, acc_top5 = pd()
        avg_cost = fluid.layers.mean(x=avg_cost)
        acc_top1 = fluid.layers.mean(x=acc_top1)
        acc_top5 = fluid.layers.mean(x=acc_top5)
    else:
        if args.model is 'se_resnext':
            out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
        else:
            out = mobile_net(img=image, class_dim=class_dim)

        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    inference_program = fluid.default_main_program().clone(for_test=True)

    if "piecewise_decay" in lr_strategy:
        bd = lr_strategy["piecewise_decay"]["bd"]
        lr = lr_strategy["piecewise_decay"]["lr"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                       values=lr),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    elif "cosine_decay" in lr_strategy:
        step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"]
        epochs = lr_strategy["cosine_decay"]["epochs"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=cosine_decay(learning_rate=learning_rate,
                                       step_each_epoch=step_each_epoch,
                                       epochs=epochs),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    else:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=learning_rate,
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))

    opts = optimizer.minimize(avg_cost)
    if args.with_mem_opt:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if init_model is not None:
        fluid.io.load_persistables(exe, init_model)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    for pass_id in range(num_passes):
        train_info = [[], [], []]
        test_info = [[], [], []]
        for batch_id, data in enumerate(train_reader()):
            t1 = time.time()
            loss, acc1, acc5 = exe.run(
                fluid.default_main_program(),
                feed=feeder.feed(data),
                fetch_list=[avg_cost, acc_top1, acc_top5])
            t2 = time.time()
            period = t2 - t1
            train_info[0].append(loss[0])
            train_info[1].append(acc1[0])
            train_info[2].append(acc5[0])
            if batch_id % 10 == 0:
                print("Pass {0}, trainbatch {1}, loss {2}, \
                       acc1 {3}, acc5 {4} time {5}"
                                                   .format(pass_id, \
                       batch_id, loss[0], acc1[0], acc5[0], \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        for data in test_reader():
            t1 = time.time()
            loss, acc1, acc5 = exe.run(
                inference_program,
                feed=feeder.feed(data),
                fetch_list=[avg_cost, acc_top1, acc_top5])
            t2 = time.time()
            period = t2 - t1
            test_info[0].append(loss[0])
            test_info[1].append(acc1[0])
            test_info[2].append(acc5[0])
            if batch_id % 10 == 0:
                print("Pass {0},testbatch {1},loss {2}, \
                       acc1 {3},acc5 {4},time {5}"
                                                  .format(pass_id, \
                       batch_id, loss[0], acc1[0], acc5[0], \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
               test_loss {4}, test_acc1 {5}, test_acc5 {6}"
                                                           .format(pass_id, \
              train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
              test_acc5))
        sys.stdout.flush()

        model_path = os.path.join(model_save_dir + '/' + args.model,
                                  str(pass_id))
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path)
Beispiel #4
0
def main(train_data_file, test_data_file, vocab_file, target_file, emb_file,
         model_save_dir, num_passes, use_gpu, parallel):

    args = parse_args()
    if not os.path.exists(model_save_dir):
        os.mkdir(model_save_dir)

    BATCH_SIZE = 200
    word_dict = load_dict(vocab_file)
    label_dict = load_dict(target_file)

    word_vector_values = get_embedding(emb_file)

    word_dict_len = len(word_dict)
    label_dict_len = len(label_dict)

    avg_cost, feature_out, word, mark, target = ner_net(
        word_dict_len, label_dict_len, parallel)

    sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
    sgd_optimizer.minimize(avg_cost)

    crf_decode = fluid.layers.crf_decoding(
        input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))

    chunk_evaluator = fluid.evaluator.ChunkEvaluator(
        input=crf_decode,
        label=target,
        chunk_scheme="IOB",
        num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        test_target = chunk_evaluator.metrics + chunk_evaluator.states
        inference_program = fluid.io.get_inference_program(test_target)

    train_reader = paddle.batch(
            reader.data_reader(train_data_file, word_dict, label_dict),
        batch_size=BATCH_SIZE, drop_last=False)
    test_reader = paddle.batch(
            reader.data_reader(test_data_file, word_dict, label_dict),
        batch_size=BATCH_SIZE, drop_last=False)

    place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
    feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place)
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())

    embedding_name = 'emb'
    embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor(
    )
    embedding_param.set(word_vector_values, place)

    batch_id = 0
    total_time = 0.0
    for pass_id in xrange(num_passes):
        chunk_evaluator.reset(exe)
        start_time = time.time()
        for data in train_reader():
            cost, batch_precision, batch_recall, batch_f1_score = exe.run(
                fluid.default_main_program(),
                feed=feeder.feed(data),
                fetch_list=[avg_cost] + chunk_evaluator.metrics)
            batch_id = batch_id + 1
        t1 = time.time()
        total_time += t1 - start_time
        pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(exe)
        if pass_id == num_passes - 1:
            if args.gpu_card_num == 1:
                train_acc_kpi.add_record(pass_precision)
                pass_duration_kpi.add_record(total_time / num_passes)
            else:
                train_acc_kpi_card4.add_record(pass_precision)
                pass_duration_kpi_card4.add_record(total_time / num_passes)

        if pass_id % 100 == 0:
            print("[TrainSet] pass_id:" + str(pass_id) + " pass_precision:" +
                  str(pass_precision) + " pass_recall:" + str(
                      pass_recall) + " pass_f1_score:" + str(pass_f1_score))
        pass_precision, pass_recall, pass_f1_score = test(
            exe, chunk_evaluator, inference_program, test_reader, place)
        if pass_id % 100 == 0:
            print("[TestSet] pass_id:" + str(pass_id) + " pass_precision:" +
                  str(pass_precision) + " pass_recall:" + str(
                      pass_recall) + " pass_f1_score:" + str(pass_f1_score))

        #save_dirname = os.path.join(model_save_dir, "params_pass_%d" % pass_id)
        #fluid.io.save_inference_model(
        #    save_dirname, ['word', 'mark', 'target'], [crf_decode], exe)

    if args.gpu_card_num == 1:
        train_acc_kpi.persist()
        pass_duration_kpi.persist()
    else:
        train_acc_kpi_card4.persist()
        pass_duration_kpi_card4.persist()
Beispiel #5
0
def train(logger, args):
    logger.info('Load data_set and vocab...')
    with open(os.path.join(args.vocab_dir, 'vocab.data'), 'rb') as fin:
        if six.PY2:
            vocab = pickle.load(fin)
        else:
            vocab = pickle.load(fin, encoding='bytes')
        logger.info('vocab size is {} and embed dim is {}'.format(
            vocab.size(), vocab.embed_dim))
    brc_data = BRCDataset(args.max_p_num, args.max_p_len, args.max_q_len,
                          args.trainset, args.devset)
    logger.info('Converting text into ids...')
    brc_data.convert_to_ids(vocab)
    logger.info('Initialize the model...')

    if not args.use_gpu:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
    else:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()

    # build model
    main_program = fluid.Program()
    startup_prog = fluid.Program()
    fluid.memory_optimize(startup_prog)
    if args.enable_ce:
        main_program.random_seed = args.random_seed
        startup_prog.random_seed = args.random_seed
    with fluid.program_guard(main_program, startup_prog):
        with fluid.unique_name.guard():
            avg_cost, s_probs, e_probs, match, feed_order = rc_model.rc_model(
                args.hidden_size, vocab, args)
            # clone from default main program and use it as the validation program
            inference_program = main_program.clone(for_test=True)

            # build optimizer
            if args.optim == 'sgd':
                optimizer = fluid.optimizer.SGD(
                    learning_rate=args.learning_rate)
            elif args.optim == 'adam':
                optimizer = fluid.optimizer.Adam(
                    learning_rate=args.learning_rate)
            elif args.optim == 'rprop':
                optimizer = fluid.optimizer.RMSPropOptimizer(
                    learning_rate=args.learning_rate)
            else:
                logger.error('Unsupported optimizer: {}'.format(args.optim))
                exit(-1)
            if args.weight_decay > 0.0:
                obj_func = avg_cost + args.weight_decay * l2_loss(main_program)
                #ipdb.set_trace()
                optimizer.minimize(obj_func)
            else:
                obj_func = avg_cost
                optimizer.minimize(obj_func)

            # initialize parameters
            place = core.CUDAPlace(0) if args.use_gpu else core.CPUPlace()
            exe = Executor(place)
            if args.load_dir:
                logger.info('load from {}'.format(args.load_dir))
                fluid.io.load_persistables(exe,
                                           args.load_dir,
                                           main_program=main_program)
            else:
                exe.run(startup_prog)
                embedding_para = fluid.global_scope().find_var(
                    'embedding_para_1').get_tensor()
                embedding_para.set(vocab.embeddings.astype(np.float32), place)
            #load elmo data
            src_pretrain_model_path = '490001'
            fluid.io.load_vars(executor=exe,
                               dirname=src_pretrain_model_path,
                               predicate=if_exist,
                               main_program=main_program)
            # prepare data
            feed_list = [
                main_program.global_block().var(var_name)
                for var_name in feed_order
            ]
            #ipdb.set_trace()
            feeder = fluid.DataFeeder(feed_list, place)

            logger.info('Training the model...')
            parallel_executor = fluid.ParallelExecutor(
                main_program=main_program,
                use_cuda=bool(args.use_gpu),
                loss_name=avg_cost.name)
            print_para(main_program, parallel_executor, logger, args)

            for pass_id in range(1, args.pass_num + 1):
                pass_start_time = time.time()
                pad_id = vocab.get_id(vocab.pad_token)
                if args.enable_ce:
                    train_reader = lambda: brc_data.gen_mini_batches(
                        'train', args.batch_size, pad_id, shuffle=False)
                else:
                    train_reader = lambda: brc_data.gen_mini_batches(
                        'train', args.batch_size, pad_id, shuffle=True)
                train_reader = read_multiple(train_reader, dev_count)
                log_every_n_batch, n_batch_loss = args.log_interval, 0
                total_num, total_loss = 0, 0
                for batch_id, batch_list in enumerate(train_reader(), 1):
                    feed_data = batch_reader(batch_list, args)
                    #ipdb.set_trace()
                    fetch_outs = parallel_executor.run(
                        feed=list(feeder.feed_parallel(feed_data, dev_count)),
                        fetch_list=[obj_func.name],
                        return_numpy=False)
                    cost_train = np.array(fetch_outs[0]).mean()
                    total_num += args.batch_size * dev_count
                    n_batch_loss += cost_train
                    total_loss += cost_train * args.batch_size * dev_count

                    if args.enable_ce and batch_id >= 100:
                        break
                    if log_every_n_batch > 0 and batch_id % log_every_n_batch == 0:
                        print_para(main_program, parallel_executor, logger,
                                   args)
                        logger.info(
                            'Average loss from batch {} to {} is {}'.format(
                                batch_id - log_every_n_batch + 1, batch_id,
                                "%.10f" % (n_batch_loss / log_every_n_batch)))
                        n_batch_loss = 0
                    if args.dev_interval > 0 and batch_id % args.dev_interval == 0:
                        if brc_data.dev_set is not None:
                            eval_loss, bleu_rouge = validation(
                                inference_program, avg_cost, s_probs, e_probs,
                                match, feed_order, place, dev_count, vocab,
                                brc_data, logger, args)
                            logger.info('Dev eval loss {}'.format(eval_loss))
                            logger.info(
                                'Dev eval result: {}'.format(bleu_rouge))
                pass_end_time = time.time()
                time_consumed = pass_end_time - pass_start_time
                logger.info('epoch: {0}, epoch_time_cost: {1:.2f}'.format(
                    pass_id, time_consumed))
                logger.info(
                    'Evaluating the model after epoch {}'.format(pass_id))
                if brc_data.dev_set is not None:
                    eval_loss, bleu_rouge = validation(inference_program,
                                                       avg_cost, s_probs,
                                                       e_probs, match,
                                                       feed_order, place,
                                                       dev_count, vocab,
                                                       brc_data, logger, args)
                    logger.info('Dev eval loss {}'.format(eval_loss))
                    logger.info('Dev eval result: {}'.format(bleu_rouge))
                else:
                    logger.warning(
                        'No dev set is loaded for evaluation in the dataset!')

                logger.info('Average train loss for epoch {} is {}'.format(
                    pass_id, "%.10f" % (1.0 * total_loss / total_num)))

                if pass_id % args.save_interval == 0:
                    model_path = os.path.join(args.save_dir, str(pass_id))
                    if not os.path.isdir(model_path):
                        os.makedirs(model_path)

                    fluid.io.save_persistables(executor=exe,
                                               dirname=model_path,
                                               main_program=main_program)
                if args.enable_ce:  # For CE
                    print("kpis\ttrain_cost_card%d\t%f" %
                          (dev_count, total_loss / total_num))
                    if brc_data.dev_set is not None:
                        print("kpis\ttest_cost_card%d\t%f" %
                              (dev_count, eval_loss))
                    print("kpis\ttrain_duration_card%d\t%f" %
                          (dev_count, time_consumed))
Beispiel #6
0
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)              #创建一个Executor实例exe
exe.run(fluid.default_startup_program()) #Executor的run()方法执行startup_program(),进行参数初始化


# **(2)定义输入数据维度**
# 
# DataFeeder负责将数据提供器(train_reader,test_reader)返回的数据转成一种特殊的数据结构,使其可以输入到Executor中。
# 
# feed_list设置向模型输入的向变量表或者变量表名

# In[9]:


# 定义输入数据维度
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])#feed_list:向模型输入的变量表或变量表名


# **(3)定义绘制训练过程的损失值变化趋势的方法draw_train_process**

# In[10]:


iter=0
iters=[]
train_costs=[]

def draw_train_process(iters,train_costs):
    title="training cost"
    plt.title(title, fontsize=24)
    plt.xlabel("iter", fontsize=14)
Beispiel #7
0
    def create_network(self, is_infer=False):
        """Create data layers and model network.
        :param is_training: Whether to create a network for training.
        :type is_training: bool 
        :return reader: Reader for input.
        :rtype reader: read generater
        :return log_probs: An output unnormalized log probability layer.
        :rtype lig_probs: Varable
        :return loss: A ctc loss layer.
        :rtype loss: Variable
        """

        if not is_infer:
            input_fields = {
                'names': ['audio_data', 'text_data', 'seq_len_data', 'masks'],
                'shapes': [[None, 161, None], [None, 1], [None, 1],
                           [None, 32, 81, None]],
                'dtypes': ['float32', 'int32', 'int64', 'float32'],
                'lod_levels': [0, 1, 0, 0]
            }

            inputs = [
                fluid.data(name=input_fields['names'][i],
                           shape=input_fields['shapes'][i],
                           dtype=input_fields['dtypes'][i],
                           lod_level=input_fields['lod_levels'][i])
                for i in range(len(input_fields['names']))
            ]

            reader = fluid.io.DataLoader.from_generator(feed_list=inputs,
                                                        capacity=64,
                                                        iterable=False,
                                                        use_double_buffer=True)

            (audio_data, text_data, seq_len_data, masks) = inputs
        else:
            audio_data = fluid.data(name='audio_data',
                                    shape=[None, 161, None],
                                    dtype='float32',
                                    lod_level=0)
            seq_len_data = fluid.data(name='seq_len_data',
                                      shape=[None, 1],
                                      dtype='int64',
                                      lod_level=0)
            masks = fluid.data(name='masks',
                               shape=[None, 32, 81, None],
                               dtype='float32',
                               lod_level=0)
            text_data = None
            reader = fluid.DataFeeder([audio_data, seq_len_data, masks],
                                      self._place)

        log_probs, loss = deep_speech_v2_network(
            audio_data=audio_data,
            text_data=text_data,
            seq_len_data=seq_len_data,
            masks=masks,
            dict_size=self._vocab_size,
            num_conv_layers=self._num_conv_layers,
            num_rnn_layers=self._num_rnn_layers,
            rnn_size=self._rnn_layer_size,
            use_gru=self._use_gru,
            share_rnn_weights=self._share_rnn_weights)
        return reader, log_probs, loss
Beispiel #8
0
    def run_trainer(self, args):
        test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
            self.get_model(batch_size=args.batch_size)

        if args.mem_opt:
            fluid.memory_optimize(fluid.default_main_program(),
                                  skip_grads=True)
        if args.is_dist:
            t = self.get_transpiler(args.trainer_id,
                                    fluid.default_main_program(),
                                    args.endpoints, args.trainers,
                                    args.sync_mode, args.dc_asgd)
            trainer_prog = t.get_trainer_program()
        else:
            trainer_prog = fluid.default_main_program()

        if args.use_cuda:
            place = fluid.CUDAPlace(0)
        else:
            place = fluid.CPUPlace()

        startup_exe = fluid.Executor(place)
        startup_exe.run(fluid.default_startup_program())

        strategy = fluid.ExecutionStrategy()
        strategy.num_threads = 1
        strategy.allow_op_delay = False

        build_stra = fluid.BuildStrategy()
        if args.batch_merge_repeat > 1:
            pass_builder = build_stra._create_passes_from_strategy()
            mypass = pass_builder.insert_pass(
                len(pass_builder.all_passes()) - 2, "multi_batch_merge_pass")
            mypass.set_int("num_repeats", args.batch_merge_repeat)

        if args.use_reduce:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
        else:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce

        exe = fluid.ParallelExecutor(args.use_cuda,
                                     loss_name=avg_cost.name,
                                     exec_strategy=strategy,
                                     build_strategy=build_stra)

        feed_var_list = [
            var for var in trainer_prog.global_block().vars.values()
            if var.is_data
        ]

        feeder = fluid.DataFeeder(feed_var_list, place)
        reader_generator = train_reader()

        def get_data():
            origin_batch = next(reader_generator)
            if args.is_dist and args.use_reader_alloc:
                new_batch = []
                for offset, item in enumerate(origin_batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return origin_batch

        out_losses = []
        for _ in six.moves.xrange(RUN_STEP):
            loss, = exe.run(fetch_list=[avg_cost.name],
                            feed=feeder.feed(get_data()))
            out_losses.append(loss[0])
        if six.PY2:
            print(pickle.dumps(out_losses))
        else:
            sys.stdout.buffer.write(pickle.dumps(out_losses))
Beispiel #9
0
def compress(args):
    # 1. quantization configs
    quant_config = {
        # weight quantize type, default is 'channel_wise_abs_max'
        'weight_quantize_type': 'channel_wise_abs_max',
        # activation quantize type, default is 'moving_average_abs_max'
        'activation_quantize_type': 'moving_average_abs_max',
        # weight quantize bit num, default is 8
        'weight_bits': 8,
        # activation quantize bit num, default is 8
        'activation_bits': 8,
        # ops of name_scope in not_quant_pattern list, will not be quantized
        'not_quant_pattern': ['skip_quant'],
        # ops of type in quantize_op_types, will be quantized
        'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
        # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
        'dtype': 'int8',
        # window size for 'range_abs_max' quantization. defaulf is 10000
        'window_size': 10000,
        # The decay coefficient of moving average, default is 0.9
        'moving_rate': 0.9,
    }

    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    if args.use_pact:
        image.stop_gradient = False
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    train_prog = fluid.default_main_program()
    val_program = fluid.default_main_program().clone(for_test=True)

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    opt = create_optimizer(args)
    opt.minimize(avg_cost)

    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    # 2. quantization transform programs (training aware)
    #    Make some quantization transforms in the graph before training and testing.
    #    According to the weight and activation quantization type, the graph will be added
    #    some fake quantize operators and fake dequantize operators.

    if args.use_pact:
        act_preprocess_func = pact
        optimizer_func = get_optimizer
        executor = exe
    else:
        act_preprocess_func = None
        optimizer_func = None
        executor = None

    val_program = quant_aware(val_program,
                              place,
                              quant_config,
                              scope=None,
                              act_preprocess_func=act_preprocess_func,
                              optimizer_func=optimizer_func,
                              executor=executor,
                              for_test=True)
    compiled_train_prog = quant_aware(train_prog,
                                      place,
                                      quant_config,
                                      scope=None,
                                      act_preprocess_func=act_preprocess_func,
                                      optimizer_func=optimizer_func,
                                      executor=executor,
                                      for_test=False)

    assert os.path.exists(
        args.pretrained_model), "pretrained_model doesn't exist"

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.fluid.io.batch(train_reader,
                                         batch_size=args.batch_size,
                                         drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, compiled_train_prog):

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                compiled_train_prog,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])

            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))

            if args.use_pact and batch_id % 1000 == 0:
                threshold = {}
                for var in val_program.list_vars():
                    if 'pact' in var.name:
                        array = np.array(fluid.global_scope().find_var(
                            var.name).get_tensor())
                        threshold[var.name] = array[0]
                print(threshold)

            batch_id += 1

    build_strategy = fluid.BuildStrategy()
    build_strategy.memory_optimize = False
    build_strategy.enable_inplace = False
    build_strategy.fuse_all_reduce_ops = False
    build_strategy.sync_batch_norm = False
    exec_strategy = fluid.ExecutionStrategy()
    compiled_train_prog = compiled_train_prog.with_data_parallel(
        loss_name=avg_cost.name,
        build_strategy=build_strategy,
        exec_strategy=exec_strategy)

    # train loop
    best_acc1 = 0.0
    best_epoch = 0

    start_epoch = 0
    if args.checkpoint_dir is not None:
        ckpt_path = args.checkpoint_dir
        assert args.checkpoint_epoch is not None, "checkpoint_epoch must be set"
        start_epoch = args.checkpoint_epoch
        fluid.io.load_persistables(exe,
                                   dirname=args.checkpoint_dir,
                                   main_program=val_program)
        start_step = start_epoch * int(
            math.ceil(float(args.total_images) / args.batch_size))
        v = fluid.global_scope().find_var('@LR_DECAY_COUNTER@').get_tensor()
        v.set(np.array([start_step]).astype(np.float32), place)

    for i in range(start_epoch, args.num_epochs):
        train(i, compiled_train_prog)
        acc1 = test(i, val_program)
        fluid.io.save_persistables(exe,
                                   dirname=os.path.join(
                                       args.output_dir, str(i)),
                                   main_program=val_program)
        if acc1 > best_acc1:
            best_acc1 = acc1
            best_epoch = i
            fluid.io.save_persistables(exe,
                                       dirname=os.path.join(
                                           args.output_dir, 'best_model'),
                                       main_program=val_program)
    if os.path.exists(os.path.join(args.output_dir, 'best_model')):
        fluid.io.load_persistables(exe,
                                   dirname=os.path.join(
                                       args.output_dir, 'best_model'),
                                   main_program=val_program)
    # 3. Freeze the graph after training by adjusting the quantize
    #    operators' order for the inference.
    #    The dtype of float_program's weights is float32, but in int8 range.
    float_program, int8_program = convert(val_program, place, quant_config, \
                                                        scope=None, \
                                                        save_int8=True)
    print("eval best_model after convert")
    final_acc1 = test(best_epoch, float_program)
    # 4. Save inference model
    model_path = os.path.join(
        quantization_model_save_dir, args.model,
        'act_' + quant_config['activation_quantize_type'] + '_w_' +
        quant_config['weight_quantize_type'])
    float_path = os.path.join(model_path, 'float')
    int8_path = os.path.join(model_path, 'int8')
    if not os.path.isdir(model_path):
        os.makedirs(model_path)

    fluid.io.save_inference_model(dirname=float_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=float_program,
                                  model_filename=float_path + '/model',
                                  params_filename=float_path + '/params')

    fluid.io.save_inference_model(dirname=int8_path,
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe,
                                  main_program=int8_program,
                                  model_filename=int8_path + '/model',
                                  params_filename=int8_path + '/params')
Beispiel #10
0
    def run_gpu_fleet_api_trainer(self, args):
        assert args.update_method == "nccl2"

        self.lr = args.lr

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = 1

        dist_strategy = DistributedStrategy()
        dist_strategy.exec_strategy = exec_strategy
        dist_strategy.fuse_memory_size = 1  # MB
        dist_strategy.fuse_laryer_size = 1
        if args.use_local_sgd:
            dist_strategy.use_local_sgd = True
        if args.ut4grad_allreduce:
            dist_strategy._ut4grad_allreduce = True
        if args.sync_batch_norm:
            dist_strategy.sync_batch_norm = True

        role = role_maker.PaddleCloudRoleMaker(is_collective=True)
        fleet.init(role)
        print_to_err("gpu_fleet", "fleet.node_num:")
        # "fleet.node_id:", fleet.node_id(),
        # "fleet.trainer_num:", fleet.worker_num())

        test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
            self.get_model(batch_size=args.batch_size, dist_strategy=dist_strategy)

        trainer_prog = fleet._origin_program
        dist_prog = fleet.main_program

        device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
        place = fluid.CUDAPlace(device_id)

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        eprint(type(self).__name__, "run worker startup program done.")

        feed_var_list = [
            var for var in trainer_prog.global_block().vars.values()
            if var.is_data
        ]

        eprint("feed_var_list:", feed_var_list)

        # tmp add this code to pass python35 gcc8 CI
        # Fixme(gongweibao, wangxi), need fix fleet api program order
        if feed_var_list[0].name == 'label':
            feed_var_list = feed_var_list[::-1]

        feeder = fluid.DataFeeder(feed_var_list, place)
        reader_generator = train_reader()

        def get_data():
            origin_batch = next(reader_generator)
            if args.update_method != "local" and args.use_reader_alloc:
                new_batch = []
                for offset, item in enumerate(origin_batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return origin_batch

        print_to_err(type(self).__name__, "begin to train on trainer")
        out_losses = []
        for i in six.moves.xrange(RUN_STEP):
            loss, = exe.run(dist_prog,
                            fetch_list=[avg_cost.name],
                            feed=feeder.feed(get_data()))
            out_losses.append(loss[0])
            print_to_err(type(self).__name__, "run step %d finished" % i)
        print_to_err(type(self).__name__, "trainer run finished")

        if six.PY2:
            print(pickle.dumps(out_losses))
        else:
            sys.stdout.buffer.write(pickle.dumps(out_losses))

        if args.save_model:
            model_save_dir = "/tmp"
            if fleet.worker_index() == 0:
                model_save_dir_fluid = os.path.join(model_save_dir,
                                                    "fluid_persistables")
                model_save_dir_fleet = os.path.join(model_save_dir,
                                                    "fleet_persistables")
                infer_save_dir_fluid = os.path.join(model_save_dir,
                                                    "fluid_infer")
                infer_save_dir_fleet = os.path.join(model_save_dir,
                                                    "fleet_infer")
            else:
                model_save_dir_fluid = os.path.join(model_save_dir,
                                                    "fluid_persistables_2")
                model_save_dir_fleet = os.path.join(model_save_dir,
                                                    "fleet_persistables_2")
                infer_save_dir_fluid = os.path.join(model_save_dir,
                                                    "fluid_infer_2")
                infer_save_dir_fleet = os.path.join(model_save_dir,
                                                    "fleet_infer_2")
            fluid.io.save_persistables(exe, model_save_dir_fluid,
                                       fleet._origin_program)
            fleet.save_persistables(executor=exe, dirname=model_save_dir_fleet)
            feeded_var_names = [var.name for var in feed_var_list]
            fluid.io.save_inference_model(infer_save_dir_fluid,
                                          feeded_var_names, [avg_cost], exe,
                                          fleet._origin_program)
            fleet.save_inference_model(exe, infer_save_dir_fleet,
                                       feeded_var_names, [avg_cost])
Beispiel #11
0
    def run_trainer(self, args):
        self.lr = args.lr
        if args.nccl2_reduce_layer_local_run:
            test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
                self.get_model(batch_size=args.batch_size, single_device=True)
        elif args.use_dgc:
            test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
                self.get_model(batch_size=args.batch_size, use_dgc=args.use_dgc)
        else:
            test_program, avg_cost, train_reader, test_reader, batch_acc, predict = \
                self.get_model(batch_size=args.batch_size)

        if args.update_method == "pserver":
            print_to_err(
                type(self).__name__,
                "begin to run transpile on trainer with pserver mode")
            t = self.get_transpiler(trainer_id=args.trainer_id,
                                    main_program=fluid.default_main_program(),
                                    pserver_endpoints=args.endpoints,
                                    trainers=args.trainers,
                                    sync_mode=args.sync_mode,
                                    dc_asgd=args.dc_asgd,
                                    hogwild_mode=args.hogwild)

            trainer_prog = t.get_trainer_program()
            print_to_err(
                type(self).__name__,
                "get trainer program done with pserver mode.")
        elif args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer":
            # transpile for nccl2
            config = fluid.DistributeTranspilerConfig()
            config.mode = "nccl2"
            config.nccl_comm_num = args.nccl_comm_num
            if args.use_hallreduce:
                config.use_hierarchical_allreduce = True
                config.hierarchical_allreduce_inter_nranks = args.hallreduce_inter_nranks
            print_to_err(
                type(self).__name__,
                "begin to run transpile on trainer with nccl2 mode")
            nccl2_t = fluid.DistributeTranspiler(config=config)
            nccl2_t.transpile(args.trainer_id,
                              program=fluid.default_main_program(),
                              startup_program=fluid.default_startup_program(),
                              trainers=args.endpoints,
                              current_endpoint=args.current_endpoint)
            print_to_err(
                type(self).__name__,
                "get trainer program done. with nccl2 mode")
            trainer_prog = fluid.default_main_program()
        else:
            print_to_err(
                type(self).__name__,
                "do nothing about main program, just use it")
            trainer_prog = fluid.default_main_program()
            print_to_err(type(self).__name__, "use main program done.")

        # FIXME(gongwb):wait pserver initialization.
        time.sleep(1)

        if args.use_cuda:
            device_id = int(os.getenv("FLAGS_selected_gpus", "0"))
            place = fluid.CUDAPlace(device_id)
        else:
            place = fluid.CPUPlace()

        exe = fluid.Executor(place)
        exe.run(fluid.default_startup_program())
        print_to_err(type(self).__name__, "run worker startup program done.")

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.num_threads = 1

        build_stra = fluid.BuildStrategy()
        # FIXME force disable enable_inplace and memory_optimize
        build_stra.enable_inplace = False
        build_stra.memory_optimize = False

        if args.hogwild:
            build_stra.async_mode = True

        if args.enable_backward_deps:
            build_stra.enable_backward_optimizer_op_deps = True

        if args.use_reduce:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
        else:
            build_stra.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.AllReduce

        pass_builder = None
        if args.batch_merge_repeat > 1:
            pass_builder = build_stra._finalize_strategy_and_create_passes()
            mypass = pass_builder.insert_pass(0, "multi_batch_merge_pass")
            mypass.set("num_repeats", args.batch_merge_repeat)

        if args.update_method == "nccl2" or args.update_method == "nccl2_reduce_layer":
            build_stra.num_trainers = len(args.endpoints.split(","))
            build_stra.trainer_id = args.trainer_id
        else:
            # case args.update_method == "nccl2_reduce_layer":
            build_stra.num_trainers = 1
            build_stra.trainer_id = 0

        print_to_err(
            type(self).__name__, "begin to compile with data parallel")
        binary = compiler.CompiledProgram(trainer_prog).with_data_parallel(
            loss_name=avg_cost.name,
            build_strategy=build_stra,
            exec_strategy=exec_strategy)
        print_to_err(
            type(self).__name__, "program compiled with data parallel")

        feed_var_list = [
            var for var in trainer_prog.global_block().vars.values()
            if var.is_data
        ]

        feeder = fluid.DataFeeder(feed_var_list, place)
        reader_generator = train_reader()

        def get_data():
            origin_batch = next(reader_generator)
            if args.update_method != "local" and args.use_reader_alloc:
                new_batch = []
                for offset, item in enumerate(origin_batch):
                    if offset % 2 == args.trainer_id:
                        new_batch.append(item)
                return new_batch
            else:
                return origin_batch

        print_to_err(type(self).__name__, "begin to train on trainer")
        out_losses = []
        for i in six.moves.xrange(RUN_STEP):
            loss, = exe.run(binary,
                            fetch_list=[avg_cost.name],
                            feed=feeder.feed(get_data()))
            out_losses.append(loss[0])
            print_to_err(type(self).__name__, "run step %d finished" % i)
        print_to_err(type(self).__name__, "trainer run finished")

        print_to_out(out_losses)
Beispiel #12
0
def train_one_user(arg_dict, trainer_config):
    show_metric = trainer_config["show_metric"]
    shuffle = trainer_config["shuffle"]
    max_training_steps = trainer_config["max_training_steps"]
    batch_size = trainer_config["batch_size"]
    # logging.info("training one user...")
    main_program = fluid.Program.parse_from_string(
        trainer_config["main_program_desc"])
    startup_program = fluid.Program.parse_from_string(
        trainer_config["startup_program_desc"])
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    scope = fluid.global_scope()
    if (startup_program is None):
        logging.error("startup_program is None")
        exit()
    exe.run(startup_program)

    feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"],
                              place=place,
                              program=main_program)
    data_server_endpoints = arg_dict["data_endpoints"]
    # create data clients
    data_client = DataClient()
    data_client.set_data_server_endpoints(data_server_endpoints)
    uid = arg_dict["uid"]
    date = arg_dict["date"]
    global_param_dict = arg_dict["global_params"]
    user_data = data_client.get_data_by_uid(uid, date)
    train_reader = reader.train_reader(user_data)
    if shuffle == True:
        train_reader = paddle.reader.shuffle(train_reader, buf_size=10000)
    train_reader = paddle.batch(train_reader, batch_size=batch_size)

    # get user param
    # logging.debug("do not need to get user params")

    set_global_param_dict(arg_dict["global_param_names"],
                          arg_dict["global_params"], scope)

    if (main_program is None):
        logging.error("main_program is None")
        exit()

    epoch = trainer_config["epoch"]
    max_steps_in_epoch = trainer_config.get("max_steps_in_epoch", -1)
    metrics = trainer_config["metrics"]
    fetch_list = []
    for var in trainer_config["target_names"]:
        fetch_list.append(var)

    for ei in range(epoch):
        fetch_res_list = []
        trained_sample_num = 0
        step = 0
        num_layers = trainer_config["num_layers"]
        hidden_size = trainer_config["n_hidden"]
        tot_loss, tot_correct = 0, 0
        tot_samples = 0
        init_hidden, init_cell = generate_init_data(batch_size, num_layers,
                                                    hidden_size)
        for data in train_reader():
            feed_data, input_lengths = prepare_input(batch_size, data,
                                                     init_hidden, init_cell)
            fetch_res = exe.run(main_program,
                                feed=feeder.feed(feed_data),
                                fetch_list=fetch_list)
            loss, last_hidden, last_cell, correct = fetch_res

            init_hidden = np.array(last_hidden)
            init_cell = np.array(last_cell)
            tot_loss += np.array(loss)
            tot_correct += np.array(correct)
            tot_samples += np.sum(input_lengths)
            step += 1
            trained_sample_num += len(data)
            fetch_res_list.append([np.array(loss), np.array(correct)])
            if max_steps_in_epoch != -1 and step >= max_steps_in_epoch:
                break

        if show_metric and trained_sample_num > 0:
            loss = tot_loss / step
            acc = float(tot_correct) / tot_samples
            print("loss: {}, acc: {}".format(loss, acc))

    local_updated_param_dict = {}
    # update user param
    # logging.debug("do not need to update user params")

    data_client.set_param_by_uid(uid, local_updated_param_dict)
    # global_updated_param_dict = {}
    write_global_param_file = arg_dict["write_global_param_file"]
    #os.makedirs("%s/params" % write_global_param_file)
    for var_name in arg_dict["global_param_names"]:
        var = scope.var(var_name).get_tensor().__array__().astype(np.float32)
        filename = os.path.join(write_global_param_file, "params", var_name)
        #logging.info("filename: {}".format(filename))
        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            os.makedirs(dirname)
        with open(filename, "w") as f:
            np.save(f, var)
    with open("%s/_info" % write_global_param_file, "w") as f:
        pickle.dump([uid, trained_sample_num], file=f)
Beispiel #13
0
def infer_one_user(arg_dict, trainer_config):
    """
    infer a model with global_param and user params
    input:
        global_param
        user_params
        infer_program
        user_data
    output:
        [sample_cout, top1] 
    """
    # run startup program, set params
    uid = arg_dict["uid"]
    batch_size = trainer_config["batch_size"]
    startup_program = fluid.Program.parse_from_string(
        trainer_config["startup_program_desc"])
    infer_program = fluid.Program.parse_from_string(
        trainer_config["infer_program_desc"])
    place = fluid.CPUPlace()
    exe = fluid.Executor(place)
    scope = fluid.global_scope()

    if (startup_program is None):
        logging.error("startup_program is None")
        exit()
    if (infer_program is None):
        logging.error("infer_program is None")
        exit()

    exe.run(startup_program)

    data_client = DataClient()
    data_client.set_data_server_endpoints(arg_dict["data_endpoints"])

    # get user param
    # logging.debug("do not need to get user params")

    set_global_param_dict(arg_dict["global_param_names"],
                          arg_dict["global_params"], scope)

    # reader
    date = arg_dict["date"]
    global_param_dict = arg_dict["global_params"]
    user_data = data_client.get_data_by_uid(uid, date)
    infer_reader = reader.infer_reader(user_data)
    infer_reader = paddle.batch(infer_reader, batch_size=batch_size)

    # run infer program
    os.mkdir(arg_dict["infer_result_dir"])
    #pred_file = open(arg_dict["infer_result_dir"] + '/' + "pred_file", "w")
    feeder = fluid.DataFeeder(feed_list=trainer_config["input_names"],
                              place=place,
                              program=infer_program)

    fetch_list = trainer_config["target_names"]
    #logging.info("fetch_list: {}".format(fetch_list))
    fetch_res = []
    sample_count = 0

    num_layers = trainer_config["num_layers"]
    hidden_size = trainer_config["n_hidden"]
    tot_correct, tot_loss = 0, 0
    tot_samples, tot_batches = 0, 0
    init_hidden, init_cell = generate_init_data(batch_size, num_layers,
                                                hidden_size)
    for data in infer_reader():
        feed_data, input_lengths = prepare_input(batch_size, data, init_hidden,
                                                 init_cell)
        fetch_res = exe.run(infer_program,
                            feed=feeder.feed(feed_data),
                            fetch_list=fetch_list)
        loss, last_hidden, last_cell, correct = fetch_res

        cost_eval = np.array(loss)
        init_hidden = np.array(last_hidden)
        init_cell = np.array(last_cell)
        correct_val = np.array(correct)
        tot_loss += cost_eval
        tot_correct += correct_val
        tot_samples += np.sum(input_lengths)
        tot_batches += 1

    loss = tot_loss / tot_batches
    acc = float(tot_correct) / tot_samples
    logging.info("infer acc: {}".format(acc))
    with open(arg_dict["infer_result_dir"] + "/res", "w") as f:
        f.write("%d\t%f\n" % (1, acc))
    def check_network_convergence(self, is_sparse, build_strategy=None):
        main = fluid.Program()
        startup = fluid.Program()
        with fluid.program_guard(main, startup):
            word = fluid.layers.data(name='word_data',
                                     shape=[1],
                                     dtype='int64',
                                     lod_level=1)
            predicate = fluid.layers.data(name='verb_data',
                                          shape=[1],
                                          dtype='int64',
                                          lod_level=1)
            ctx_n2 = fluid.layers.data(name='ctx_n2_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            ctx_n1 = fluid.layers.data(name='ctx_n1_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            ctx_0 = fluid.layers.data(name='ctx_0_data',
                                      shape=[1],
                                      dtype='int64',
                                      lod_level=1)
            ctx_p1 = fluid.layers.data(name='ctx_p1_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            ctx_p2 = fluid.layers.data(name='ctx_p2_data',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            mark = fluid.layers.data(name='mark_data',
                                     shape=[1],
                                     dtype='int64',
                                     lod_level=1)

            feature_out = db_lstm(**locals())
            target = fluid.layers.data(name='target',
                                       shape=[1],
                                       dtype='int64',
                                       lod_level=1)
            crf_cost = fluid.layers.linear_chain_crf(
                input=feature_out,
                label=target,
                param_attr=fluid.ParamAttr(name='crfw', learning_rate=1e-1))
            avg_cost = fluid.layers.mean(crf_cost)

            sgd_optimizer = fluid.optimizer.SGD(
                learning_rate=fluid.layers.exponential_decay(
                    learning_rate=0.01,
                    decay_steps=100000,
                    decay_rate=0.5,
                    staircase=True))
            sgd_optimizer.minimize(avg_cost)

            train_data = paddle.batch(paddle.reader.shuffle(
                paddle.dataset.conll05.test(), buf_size=8192),
                                      batch_size=16)

            place = fluid.CUDAPlace(0)
            exe = fluid.Executor(place)
            exe.run(startup)

            pe = fluid.ParallelExecutor(use_cuda=True,
                                        loss_name=avg_cost.name,
                                        build_strategy=build_strategy)

            feeder = fluid.DataFeeder(feed_list=[
                word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark,
                target
            ],
                                      place=fluid.CPUPlace())

            data = train_data()
            for i in xrange(10):
                cur_batch = next(data)
                print map(
                    np.array,
                    pe.run(feed=feeder.feed(cur_batch),
                           fetch_list=[avg_cost.name]))[0]
Beispiel #15
0
    # create loss
    learning_rate = fluid.layers.piecewise_decay(BOUNDARIES, LR_STEPS)  # case1, Tensor
    optimizer = fluid.optimizer.Adam(learning_rate=learning_rate,
                                     regularization=fluid.regularizer.L2Decay(
                                         regularization_coeff=REGULARIZATION_COEFF))
    optimizer.minimize(loss)

# feed data
train_reader = reader(DATA_CSV, is_none_pre=NONE_PRE, train_rate=TRAIN_DATA_RATE)
val_reader = reader(DATA_CSV, is_none_pre=NONE_PRE, is_val=True, train_rate=TRAIN_DATA_RATE)
train_reader = fluid.io.batch(fluid.io.shuffle(train_reader, buf_size=1024), batch_size=BATCH_SIZE)
val_reader = fluid.io.batch(val_reader, batch_size=BATCH_SIZE)
feed_list = ["ori_input_ids", "ori_position_ids", "ori_segment_ids", "ori_input_mask", "input_ids", "position_ids",
             "segment_ids", "input_mask", "scores"]
train_feeder = fluid.DataFeeder(feed_list=feed_list,
                                place=place,
                                program=train_program)
val_feeder = fluid.DataFeeder(feed_list=feed_list,
                              place=place,
                              program=train_program)


# define train
def controller_process(program, data_reader, feeder):
    global FIRST_FLAG, DATA_NUM
    infos = {"loss": [], "out": [], "label": []}
    for i, data in enumerate(data_reader()):
        info = controller.run(program=program,
                              feed=feeder.feed(data),
                              fetch_list=[loss, net, scores_label])
        try:
Beispiel #16
0
def main(dict_path):
    word_dict = load_vocab(dict_path)
    word_dict["<unk>"] = len(word_dict)
    dict_dim = len(word_dict)
    print("The dictionary size is : %d" % dict_dim)

    data, label, prediction, avg_cost = conv_net(dict_dim)

    sgd_optimizer = fluid.optimizer.SGD(learning_rate=conf.learning_rate)
    optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)

    batch_size_var = fluid.layers.create_tensor(dtype='int64')
    batch_acc_var = fluid.layers.accuracy(input=prediction,
                                          label=label,
                                          total=batch_size_var)

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
        inference_program = fluid.io.get_inference_program(
            target_vars=[batch_acc_var, batch_size_var])

    # The training data set.
    train_reader = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.imdb.train(word_dict), buf_size=51200),
                                batch_size=conf.batch_size)

    # The testing data set.
    test_reader = paddle.batch(paddle.reader.shuffle(
        paddle.dataset.imdb.test(word_dict), buf_size=51200),
                               batch_size=conf.batch_size)

    if conf.use_gpu:
        place = fluid.CUDAPlace(0)
    else:
        place = fluid.CPUPlace()

    exe = fluid.Executor(place)

    feeder = fluid.DataFeeder(feed_list=[data, label], place=place)

    # exe.run(fluid.default_startup_program())

    train_pass_acc_evaluator = fluid.average.WeightedAverage()
    test_pass_acc_evaluator = fluid.average.WeightedAverage()

    def test(exe):
        test_pass_acc_evaluator.reset()
        for batch_id, data in enumerate(test_reader()):
            input_seq = to_lodtensor(map(lambda x: x[0], data), place)
            y_data = np.array(map(lambda x: x[1], data)).astype("int64")
            y_data = y_data.reshape([-1, 1])
            b_acc, b_size = exe.run(inference_program,
                                    feed={
                                        "words": input_seq,
                                        "label": y_data
                                    },
                                    fetch_list=[batch_acc_var, batch_size_var])
            test_pass_acc_evaluator.add(value=b_acc, weight=b_size)
        test_acc = test_pass_acc_evaluator.eval()
        return test_acc

    def train_loop(exe, train_program, trainer_id):
        total_time = 0.
        for pass_id in xrange(conf.num_passes):
            train_pass_acc_evaluator.reset()
            start_time = time.time()
            total_samples = 0
            #with profiler.profiler("CPU", 'total', profile_path='./profile_res_%d' % trainer_id) as prof:
            for batch_id, data in enumerate(train_reader()):
                batch_start = time.time()
                cost_val, acc_val, size_val = exe.run(
                    train_program,
                    feed=feeder.feed(data),
                    fetch_list=[avg_cost, batch_acc_var, batch_size_var])
                train_pass_acc_evaluator.add(value=acc_val, weight=size_val)
                total_samples += float(size_val)
                if batch_id and batch_id % conf.log_period == 0:
                    print(
                        "Pass id: %d, batch id: %d, cost: %f, pass_acc: %f, speed: %f, time: %f"
                        % (pass_id, batch_id, cost_val,
                           train_pass_acc_evaluator.eval(), float(size_val) /
                           (time.time() - batch_start),
                           time.time() - batch_start))
            end_time = time.time()
            total_time += (end_time - start_time)
            pass_test_acc = test(exe)
            print("Pass id: %d, test_acc: %f, speed: %f" %
                  (pass_id, pass_test_acc, total_samples /
                   (end_time - start_time)))
        print("Total train time: %f" % (total_time))

    if args.local:
        print("run as local mode")
        exe.run(fluid.default_startup_program())
        train_loop(exe, fluid.default_main_program(), 0)
    else:
        pserver_ips = os.getenv(
            "PADDLE_INIT_PSERVERS")  # all pserver endpoints
        eplist = []
        port = os.getenv("PADDLE_INIT_PORT")
        for ip in pserver_ips.split(","):
            eplist.append(':'.join([ip, port]))
        pserver_endpoints = ",".join(eplist)
        print("pserver endpoints: ", pserver_endpoints)
        trainers = int(os.getenv("TRAINERS"))  # total trainer count
        print("trainers total: ", trainers)
        trainer_id = int(os.getenv("PADDLE_INIT_TRAINER_ID", "0"))
        current_endpoint = os.getenv(
            "POD_IP") + ":" + port  # current pserver endpoint
        training_role = os.getenv(
            "TRAINING_ROLE",
            "TRAINER")  # get the training role: trainer/pserver
        t = fluid.DistributeTranspiler()
        t.transpile(optimize_ops,
                    params_grads,
                    trainer_id,
                    pservers=pserver_endpoints,
                    trainers=trainers)

        if training_role == "PSERVER":
            if not current_endpoint:
                print("need env SERVER_ENDPOINT")
                exit(1)
            pserver_prog = t.get_pserver_program(current_endpoint)
            with open("/tmp/pserver_prog", "w") as f:
                f.write(pserver_prog.__str__())
            print("######## pserver prog in /tmp/pserver_prog #############")
            pserver_startup = t.get_startup_program(current_endpoint,
                                                    pserver_prog)
            print("starting server side startup")
            exe.run(pserver_startup)
            print("starting parameter server...")
            exe.run(pserver_prog)
        elif training_role == "TRAINER":
            trainer_prog = t.get_trainer_program()
            with open("/tmp/trainer_prog", "w") as f:
                f.write(trainer_prog.__str__())
            print("######## trainer prog in /tmp/trainer_prog #############")
            # TODO(typhoonzero): change trainer startup program to fetch parameters from pserver
            exe.run(fluid.default_startup_program())
            train_loop(exe, trainer_prog, trainer_id)
        else:
            print("environment var TRAINER_ROLE should be TRAINER os PSERVER")
Beispiel #17
0
    crf_cost = fluid.layers.linear_chain_crf(
        input=score, label=tags, param_attr=fluid.ParamAttr(name="crfw")
    )

    avg_cost = fluid.layers.mean(crf_cost)

    crf_decode = fluid.layers.crf_decoding(
        input=score, param_attr=fluid.ParamAttr(name="crfw")
    )

    sgd_optimizer = fluid.optimizer.AdamOptimizer(learning_rate=0.01)

    sgd_optimizer.minimize(avg_cost)

    feeder = fluid.DataFeeder(place=place, feed_list=[words, tags])
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())

    save_dirname = "test.inference.model"
    main_program = fluid.default_main_program()

    PASS_NUM = 20
    for pass_id in range(PASS_NUM):
        print(">>> pass_id: {}".format(pass_id))
        for data in train_reader():
            feed = feeder.feed(data)

            avg_loss_value, = exe.run(
                main_program, feed=feed, fetch_list=[avg_cost], return_numpy=True
def eval(args):
    # parameters from arguments
    class_dim = args.class_dim
    model_name = args.model
    pretrained_model = args.pretrained_model
    image_shape = [int(m) for m in args.image_shape.split(",")]

    assert model_name in model_list, "{} is not in lists: {}".format(
        args.model, model_list)

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')

    # model definition
    model = models.__dict__[model_name]()

    if model_name is "GoogleNet":
        out, _, _ = model.net(input=image, class_dim=class_dim)
    else:
        out = model.net(input=image, class_dim=class_dim)

    test_program = fluid.default_main_program().clone(for_test=True)

    fetch_list = [out.name]

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    test_batch_size = args.batch_size

    img_size = image_shape[1]
    test_reader = paddle.batch(reader.test(args, img_size),
                               batch_size=test_batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image])

    targets = []
    with open(args.img_list, 'r') as f:
        for line in f.readlines():
            targets.append(line.strip().split()[-1])
    targets = np.array(targets, dtype=np.int)

    preds = []
    TOPK = 5

    for batch_id, data in enumerate(test_reader()):
        all_result = exe.run(test_program,
                             fetch_list=fetch_list,
                             feed=feeder.feed(data))
        pred_label = np.argsort(-all_result[0], 1)[:, :5]
        print("Test-{0}".format(batch_id))
        preds.append(pred_label)
    preds = np.vstack(preds)
    top1, top5 = accuracy(targets, preds)
    print("top1:{:.4f} top5:{:.4f}".format(top1, top5))
def main(args):

    task_name = args.task_name.lower()
    processor = reader.MatchProcessor(data_dir=args.data_dir,
                                      task_name=task_name,
                                      vocab_path=args.vocab_path,
                                      max_seq_len=args.max_seq_len,
                                      do_lower_case=args.do_lower_case)

    args.voc_size = len(open(args.vocab_path, 'r').readlines())
    num_labels = len(processor.get_labels())
    train_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                    phase='train',
                                                    epoch=args.epoch,
                                                    shuffle=True)
    num_train_examples = processor.get_num_examples(phase='train')
    dev_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                  phase='dev',
                                                  epoch=1,
                                                  shuffle=False)
    num_dev_examples = processor.get_num_examples(phase='dev')

    if args.use_cuda:
        place = fluid.CUDAPlace(0)
        dev_count = fluid.core.get_cuda_device_count()
    else:
        place = fluid.CPUPlace()
        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

    max_train_steps = args.epoch * num_train_examples // args.batch_size
    warmup_steps = int(max_train_steps * args.warmup_proportion)

    train_program = fluid.Program()
    train_startup = fluid.Program()
    with fluid.program_guard(train_program, train_startup):
        with fluid.unique_name.guard():
            feed_order, loss, predict, accuracy, num_seqs, labels, softmax, prob, indexs1,logits = \
                    create_model(args, num_labels, \
                    is_prediction=False)
            lr_decay = fluid.layers.learning_rate_scheduler.noam_decay(
                256, warmup_steps)
            with fluid.default_main_program()._lr_schedule_guard():
                learning_rate = lr_decay * args.learning_rate
            optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
            optimizer.minimize(loss)

    test_program = fluid.Program()
    test_startup = fluid.Program()
    with fluid.program_guard(test_program, test_startup):
        with fluid.unique_name.guard():
            feed_order, loss, predict, accuracy, num_seqs, labels, softmax, prob, indexs1,logits = \
                    create_model(args, num_labels, \
                    is_prediction=True)
    test_program = test_program.clone(for_test=True)

    exe = Executor(place)
    exe.run(train_startup)
    exe.run(test_startup)

    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_threads = dev_count

    train_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                       loss_name=loss.name,
                                       exec_strategy=exec_strategy,
                                       main_program=train_program)

    test_exe = fluid.ParallelExecutor(use_cuda=args.use_cuda,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    feed_list = [
        train_program.global_block().var(var_name) for var_name in feed_order
    ]
    feeder = fluid.DataFeeder(feed_list, place)

    time_begin = time.time()
    total_cost, total_acc, total_num_seqs = [], [], []
    tmp11 = []
    tmp22 = []

    for batch_id, data in enumerate(train_data_generator()):
        fetch_outs = train_exe.run(feed=feeder.feed(data),
                                   fetch_list=[
                                       loss.name, accuracy.name, num_seqs.name,
                                       predict.name, labels.name, softmax.name,
                                       logits.name
                                   ])
        avg_loss = fetch_outs[0]
        avg_acc = fetch_outs[1]
        cur_num_seqs = fetch_outs[2]
        total_cost.extend(avg_loss * cur_num_seqs)
        total_acc.extend(avg_acc * cur_num_seqs)
        total_num_seqs.extend(cur_num_seqs)
        results1 = fetch_outs[3]
        act1 = fetch_outs[4]
        for index in range(len(results1)):
            if results1[index][0] > results1[index][1]:
                tmp11.append(0)
            else:
                tmp11.append(1)
            tmp22.append(act1[index])

        if batch_id % args.skip_steps == 0:
            print(fetch_outs[5][0:3])
            #print(fetch_outs[6])
            print(fetch_outs[6][0:3])
            print(classification_report(tmp22, tmp11))
            tmp11 = []
            tmp22 = []

            time_end = time.time()
            used_time = time_end - time_begin
            current_example, current_epoch = processor.get_train_progress()
            print("epoch: %d, progress: %d/%d, step: %d, ave loss: %f, "
                  "ave acc: %f, speed: %f steps/s" %
                  (current_epoch, current_example, num_train_examples,
                   batch_id, np.sum(total_cost) / np.sum(total_num_seqs),
                   np.sum(total_acc) / np.sum(total_num_seqs),
                   args.skip_steps / used_time))
            time_begin = time.time()
            total_cost, total_acc, total_num_seqs = [], [], []

        if batch_id % args.validation_steps == 0:
            total_dev_cost, total_dev_acc, total_dev_num_seqs = [], [], []
            tmp1 = []
            tmp2 = []
            for dev_id, dev_data in enumerate(dev_data_generator()):
                fetch_outs = test_exe.run(feed=feeder.feed(dev_data),
                                          fetch_list=[
                                              loss.name, accuracy.name,
                                              num_seqs.name, predict.name,
                                              labels.name
                                          ])
                avg_dev_loss = fetch_outs[0]
                avg_dev_acc = fetch_outs[1]
                cur_dev_num_seqs = fetch_outs[2]
                results = fetch_outs[3]
                act = fetch_outs[4]

                for index in range(len(results)):
                    if results[index][0] > results[index][1]:
                        tmp1.append(0)
                    else:
                        tmp1.append(1)
                    tmp2.append(act[index])

                total_dev_cost.extend(avg_dev_loss * cur_dev_num_seqs)
                total_dev_acc.extend(avg_dev_acc * cur_dev_num_seqs)
                total_dev_num_seqs.extend(cur_dev_num_seqs)

            print(classification_report(tmp2, tmp1))
            print("valid eval: ave loss: %f, ave acc: %f" %
                  (np.sum(total_dev_cost) / np.sum(total_dev_num_seqs),
                   np.sum(total_dev_acc) / np.sum(total_dev_num_seqs)))
            total_dev_cost, total_dev_acc, total_dev_num_seqs = [], [], []

        # if batch_id % args.save_steps == 0:
    model_path = os.path.join(args.checkpoints, str(batch_id))
    if not os.path.isdir(model_path):
        os.makedirs(model_path)
    fluid.io.save_persistables(executor=exe,
                               dirname=model_path,
                               main_program=train_program)

    # predict
    print("=================for predict===================")
    infer_data_generator = processor.data_generator(batch_size=args.batch_size,
                                                    phase='test',
                                                    epoch=1,
                                                    shuffle=False)
    for batch_id, data in enumerate(infer_data_generator()):
        results = test_exe.run(fetch_list=[predict.name],
                               feed=feeder.feed(data),
                               return_numpy=True)
        for elem in results[0]:
            print(elem[1])

    print("=================for dev===================")
    infer_data_generator2 = processor.data_generator(
        batch_size=args.batch_size, phase='dev', epoch=1, shuffle=False)
    for batch_id, data in enumerate(infer_data_generator2()):
        results = test_exe.run(fetch_list=[predict.name],
                               feed=feeder.feed(data),
                               return_numpy=True)
        for elem in results[0]:
            print(elem[1])
Beispiel #20
0
def train():
    update_lr(cfg)
    learning_rate = cfg.learning_rate
    image_shape = [3, cfg.TRAIN.max_size, cfg.TRAIN.max_size]

    if cfg.enable_ce:
        fluid.default_startup_program().random_seed = 1000
        fluid.default_main_program().random_seed = 1000
        import random
        random.seed(0)
        np.random.seed(0)

    devices_num = get_device_num()
    total_batch_size = devices_num * cfg.TRAIN.im_per_batch

    use_random = True
    if cfg.enable_ce:
        use_random = False
    model = model_builder.RCNN(
        add_conv_body_func=resnet.add_ResNet50_conv4_body,
        add_roi_box_head_func=resnet.add_ResNet_roi_conv5_head,
        use_pyreader=cfg.use_pyreader,
        use_random=use_random)
    model.build_model(image_shape)
    losses, keys = model.loss()
    loss = losses[0]
    fetch_list = losses

    boundaries = cfg.lr_steps
    gamma = cfg.lr_gamma
    step_num = len(cfg.lr_steps)
    values = [learning_rate * (gamma**i) for i in range(step_num + 1)]

    lr = exponential_with_warmup_decay(
        learning_rate=learning_rate,
        boundaries=boundaries,
        values=values,
        warmup_iter=cfg.warm_up_iter,
        warmup_factor=cfg.warm_up_factor)
    optimizer = fluid.optimizer.Momentum(
        learning_rate=lr,
        regularization=fluid.regularizer.L2Decay(cfg.weight_decay),
        momentum=cfg.momentum)
    optimizer.minimize(loss)
    fetch_list = fetch_list + [lr]

    for var in fetch_list:
        var.persistable = True

    #fluid.memory_optimize(fluid.default_main_program(), skip_opt_set=set(fetch_list))
    gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0))
    place = fluid.CUDAPlace(gpu_id) if cfg.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if cfg.pretrained_model:
        def if_exist(var):
            return os.path.exists(os.path.join(cfg.pretrained_model, var.name))
        fluid.io.load_vars(exe, cfg.pretrained_model, predicate=if_exist)

    if cfg.parallel:
        build_strategy = fluid.BuildStrategy()
        build_strategy.memory_optimize = False
        build_strategy.enable_inplace = False

        if cfg.use_gpu:
            dist_utils.prepare_for_multi_process(
                    exe, 
                    build_strategy, 
                    fluid.default_main_program(), 
                    fluid.default_startup_program())

        exec_strategy = fluid.ExecutionStrategy()
        exec_strategy.use_experimental_executor = True
        exec_strategy.num_iteration_per_drop_scope = 10
        train_exe = fluid.ParallelExecutor(use_cuda=bool(cfg.use_gpu), 
                            loss_name=loss.name, 
                            build_strategy=build_strategy, 
                            exec_strategy=exec_strategy)
    else:
        train_exe = exe

    shuffle = True
    if cfg.enable_ce:
        shuffle = False
    if cfg.use_pyreader:
        train_reader = reader.train(
            batch_size=cfg.TRAIN.im_per_batch,
            total_batch_size=total_batch_size,
            padding_total=cfg.TRAIN.padding_minibatch,
            shuffle=shuffle)
        py_reader = model.py_reader
        py_reader.decorate_paddle_reader(train_reader)
    else:
        train_reader = reader.train(
            batch_size=total_batch_size, shuffle=shuffle)
        feeder = fluid.DataFeeder(place=place, feed_list=model.feeds())

    def save_model(postfix):
        model_path = os.path.join(cfg.model_save_dir, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path)

    def train_loop_pyreader():
        py_reader.start()
        train_stats = TrainingStats(cfg.log_window, keys)
        try:
            start_time = time.time()
            prev_start_time = start_time
            for iter_id in range(cfg.max_iter):
                prev_start_time = start_time
                start_time = time.time()
                outs = train_exe.run(fetch_list=[v.name for v in fetch_list])
                stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
                train_stats.update(stats)
                logs = train_stats.log()
                strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                    now_time(), iter_id,
                    np.mean(outs[-1]), logs, start_time - prev_start_time)
                print(strs)
                sys.stdout.flush()
                if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                    save_model("model_iter{}".format(iter_id))
            end_time = time.time()
            total_time = end_time - start_time
            last_loss = np.array(outs[0]).mean()
            if cfg.enable_ce:
                gpu_num = devices_num
                epoch_idx = iter_id + 1
                loss = last_loss
                print("kpis\teach_pass_duration_card%s\t%s" %
                      (gpu_num, total_time / epoch_idx))
                print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))
        except (StopIteration, fluid.core.EOFException):
            py_reader.reset()

    def train_loop():
        start_time = time.time()
        prev_start_time = start_time
        start = start_time
        train_stats = TrainingStats(cfg.log_window, keys)
        for iter_id, data in enumerate(train_reader()):
            prev_start_time = start_time
            start_time = time.time()
            outs = train_exe.run(fetch_list=[v.name for v in fetch_list],
                                 feed=feeder.feed(data))
            stats = {k: np.array(v).mean() for k, v in zip(keys, outs[:-1])}
            train_stats.update(stats)
            logs = train_stats.log()
            strs = '{}, iter: {}, lr: {:.5f}, {}, time: {:.3f}'.format(
                now_time(), iter_id,
                np.mean(outs[-1]), logs, start_time - prev_start_time)
            print(strs)
            sys.stdout.flush()
            if (iter_id + 1) % cfg.TRAIN.snapshot_iter == 0:
                save_model("model_iter{}".format(iter_id))
            if (iter_id + 1) == cfg.max_iter:
                break
        end_time = time.time()
        total_time = end_time - start_time
        last_loss = np.array(outs[0]).mean()
        # only for ce
        if cfg.enable_ce:
            gpu_num = devices_num
            epoch_idx = iter_id + 1
            loss = last_loss
            print("kpis\teach_pass_duration_card%s\t%s" %
                  (gpu_num, total_time / epoch_idx))
            print("kpis\ttrain_loss_card%s\t%s" % (gpu_num, loss))

        return np.mean(every_pass_loss)

    if cfg.use_pyreader:
        train_loop_pyreader()
    else:
        train_loop()
    save_model('model_final')
Beispiel #21
0
trainer_id = int(sys.argv[1])  # trainer id for each guest
job_path = "fl_job_config"
job = FLRunTimeJob()
job.load_trainer_job(job_path, trainer_id)
job._scheduler_ep = "127.0.0.1:9091"  # Inform the scheduler IP to trainer
print(job._target_names)
trainer = FLTrainerFactory().create_fl_trainer(job)
trainer._current_ep = "127.0.0.1:{}".format(9000 + trainer_id)
place = fluid.CPUPlace()
trainer.start(place)
print(trainer._step)
test_program = trainer._main_program.clone(for_test=True)

img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())


def train_test(train_test_program, train_test_feed, train_test_reader):
    acc_set = []
    for test_data in train_test_reader():
        acc_np = trainer.exe.run(program=train_test_program,
                                 feed=train_test_feed.feed(test_data),
                                 fetch_list=["accuracy_0.tmp_0"])
        acc_set.append(float(acc_np[0]))
    acc_val_mean = numpy.array(acc_set).mean()
    return acc_val_mean


epoch_id = 0
step = 0
Beispiel #22
0
def eval(args):
    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))
    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone(for_test=True)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    val_reader = paddle.fluid.io.batch(val_reader, batch_size=args.batch_size)

    val_feeder = feeder = fluid.DataFeeder(
        [image, label], place, program=val_program)

    load_model(exe, val_program, args.model_path)

    batch_id = 0
    acc_top1_ns = []
    acc_top5_ns = []
    for data in val_reader():
        start_time = time.time()
        acc_top1_n, acc_top5_n = exe.run(
            val_program,
            feed=val_feeder.feed(data),
            fetch_list=[acc_top1.name, acc_top5.name])
        end_time = time.time()
        if batch_id % args.log_period == 0:
            _logger.info(
                "Eval batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".format(
                    batch_id,
                    np.mean(acc_top1_n),
                    np.mean(acc_top5_n), end_time - start_time))
        acc_top1_ns.append(np.mean(acc_top1_n))
        acc_top5_ns.append(np.mean(acc_top5_n))
        batch_id += 1

    _logger.info("Final eval - acc_top1: {}; acc_top5: {}".format(
        np.mean(np.array(acc_top1_ns)), np.mean(np.array(acc_top5_ns))))
Beispiel #23
0
def validation(inference_program, avg_cost, s_probs, e_probs, match,
               feed_order, place, dev_count, vocab, brc_data, logger, args):
    """
        
    """
    build_strategy = fluid.BuildStrategy()
    build_strategy.enable_inplace = False
    build_strategy.memory_optimize = False
    parallel_executor = fluid.ParallelExecutor(main_program=inference_program,
                                               use_cuda=bool(args.use_gpu),
                                               loss_name=avg_cost.name,
                                               build_strategy=build_strategy)
    print_para(inference_program, parallel_executor, logger, args)

    # Use test set as validation each pass
    total_loss = 0.0
    count = 0
    n_batch_cnt = 0
    n_batch_loss = 0.0
    pred_answers, ref_answers = [], []
    val_feed_list = [
        inference_program.global_block().var(var_name)
        for var_name in feed_order
    ]
    val_feeder = fluid.DataFeeder(val_feed_list, place)
    pad_id = vocab.get_id(vocab.pad_token)
    dev_reader = lambda: brc_data.gen_mini_batches(
        'dev', args.batch_size, pad_id, shuffle=False)
    dev_reader = read_multiple(dev_reader, dev_count)

    for batch_id, batch_list in enumerate(dev_reader(), 1):
        feed_data = batch_reader(batch_list, args)
        val_fetch_outs = parallel_executor.run(
            feed=list(val_feeder.feed_parallel(feed_data, dev_count)),
            fetch_list=[avg_cost.name, s_probs.name, e_probs.name, match.name],
            return_numpy=False)
        total_loss += np.array(val_fetch_outs[0]).sum()
        start_probs_m = LodTensor_Array(val_fetch_outs[1])
        end_probs_m = LodTensor_Array(val_fetch_outs[2])
        match_lod = val_fetch_outs[3].lod()
        count += len(np.array(val_fetch_outs[0]))

        n_batch_cnt += len(np.array(val_fetch_outs[0]))
        n_batch_loss += np.array(val_fetch_outs[0]).sum()
        log_every_n_batch = args.log_interval
        if log_every_n_batch > 0 and batch_id % log_every_n_batch == 0:
            logger.info('Average dev loss from batch {} to {} is {}'.format(
                batch_id - log_every_n_batch + 1, batch_id,
                "%.10f" % (n_batch_loss / n_batch_cnt)))
            n_batch_loss = 0.0
            n_batch_cnt = 0
        batch_offset = 0
        for idx, batch in enumerate(batch_list):
            #one batch
            batch_size = len(batch['raw_data'])
            batch_range = match_lod[0][batch_offset:batch_offset + batch_size +
                                       1]
            batch_lod = [[batch_range[x], batch_range[x + 1]]
                         for x in range(len(batch_range[:-1]))]
            start_prob_batch = start_probs_m[batch_offset:batch_offset +
                                             batch_size + 1]
            end_prob_batch = end_probs_m[batch_offset:batch_offset +
                                         batch_size + 1]
            for sample, start_prob_inst, end_prob_inst, inst_range in zip(
                    batch['raw_data'], start_prob_batch, end_prob_batch,
                    batch_lod):
                #one instance
                inst_lod = match_lod[1][inst_range[0]:inst_range[1] + 1]
                best_answer, best_span = find_best_answer_for_inst(
                    sample, start_prob_inst, end_prob_inst, inst_lod)
                pred = {
                    'question_id': sample['question_id'],
                    'question_type': sample['question_type'],
                    'answers': [best_answer],
                    'entity_answers': [[]],
                    'yesno_answers': [best_span]
                }
                pred_answers.append(pred)
                if 'answers' in sample:
                    ref = {
                        'question_id': sample['question_id'],
                        'question_type': sample['question_type'],
                        'answers': sample['answers'],
                        'entity_answers': [[]],
                        'yesno_answers': []
                    }
                    ref_answers.append(ref)
            batch_offset = batch_offset + batch_size

    result_dir = args.result_dir
    result_prefix = args.result_name
    if result_dir is not None and result_prefix is not None:
        if not os.path.exists(args.result_dir):
            os.makedirs(args.result_dir)
        result_file = os.path.join(result_dir, result_prefix + 'json')
        with open(result_file, 'w') as fout:
            for pred_answer in pred_answers:
                fout.write(json.dumps(pred_answer, ensure_ascii=False) + '\n')
        logger.info('Saving {} results to {}'.format(result_prefix,
                                                     result_file))

    ave_loss = 1.0 * total_loss / count
    # compute the bleu and rouge scores if reference answers is provided
    if len(ref_answers) > 0:
        pred_dict, ref_dict = {}, {}
        for pred, ref in zip(pred_answers, ref_answers):
            question_id = ref['question_id']
            if len(ref['answers']) > 0:
                pred_dict[question_id] = normalize(pred['answers'])
                ref_dict[question_id] = normalize(ref['answers'])
        bleu_rouge = compute_bleu_rouge(pred_dict, ref_dict)
    else:
        bleu_rouge = None
    return ave_loss, bleu_rouge
def predict_infer(conf_dict, data_reader, predict_data_path,
                  predict_result_path, model_path):
    """
    Predict with trained models
    """
    if len(predict_result_path) > 0:
        result_writer = open(predict_result_path, 'w')
    else:
        result_writer = sys.stdout

    np.set_printoptions(precision=3)
    if len(model_path) == 0:
        return

    place = fluid.CPUPlace()
    word = fluid.layers.data(name='word_data',
                             shape=[1],
                             dtype='int64',
                             lod_level=1)
    postag = fluid.layers.data(name='token_pos',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    p_word = fluid.layers.data(name='p_word',
                               shape=[1],
                               dtype='int64',
                               lod_level=1)
    feeder = fluid.DataFeeder(feed_list=[word, postag, p_word], place=place)
    exe = fluid.Executor(place)

    test_batch_reader = paddle.batch(paddle.reader.buffered(
        data_reader.get_predict_reader(predict_data_path,
                                       need_input=True,
                                       need_label=False),
        size=8192),
                                     batch_size=conf_dict['batch_size'])
    inference_scope = fluid.core.Scope()
    text_spo_dic = {}  # final triples
    with fluid.scope_guard(inference_scope):
        [inference_program, feed_target_names, fetch_targets] = \
            fluid.io.load_inference_model(
                model_path, exe, params_filename='params')

        # batch
        batch_id = 0
        for data in test_batch_reader():
            feeder_data = []
            input_data = []
            for item in data:
                feeder_data.append(item[1:])
                input_data.append(item[0])
            results = exe.run(inference_program,
                              feed=feeder.feed(feeder_data),
                              fetch_list=fetch_targets,
                              return_numpy=False)
            tag_split_idx = results[0].lod()[0]
            label_tag_scores = np.array(results[0])
            # sentence
            #print('batch_id=', batch_id)
            for sent_idx, tag_idx in enumerate(tag_split_idx[:-1]):
                input_sent = input_data[sent_idx].split('\t')[0]
                input_p = input_data[sent_idx].split('\t')[1]
                tag_scores = label_tag_scores[tag_idx:tag_split_idx[sent_idx +
                                                                    1]]
                # token
                tag_list = []
                for token_idx, token_tags in enumerate(tag_scores):
                    tag = data_reader.get_label_output(token_tags)
                    tag_list.append(tag)
                predicted_s_list, predicted_o_list = refine_predict_seq(
                    input_sent, tag_list)
                tag_list_str = json.dumps(tag_list, ensure_ascii=False)
                if len(predicted_s_list) == 0 or len(predicted_o_list) == 0:
                    continue
                else:
                    text = json.loads(input_sent)["text"]
                    predicted_s_list = list(set(predicted_s_list))
                    predicted_o_list = list(set(predicted_o_list))
                    for predicted_s in predicted_s_list:
                        for predicted_o in predicted_o_list:
                            if text not in text_spo_dic:
                                text_spo_dic[text] = set()
                            text_spo_dic[text].add(
                                (predicted_s, input_p, predicted_o))

            batch_id += 1
    output(text_spo_dic, result_writer)
                                              regularization=l2)
opts = optimizer.minimize(avg_cost)

# 获取CIFAR数据
train_reader = paddle.batch(cifar.train10(), batch_size=32)
test_reader = paddle.batch(cifar.test10(), batch_size=32)

# 定义一个使用GPU的执行器
place = fluid.CUDAPlace(0)
# place = fluid.CPUPlace()
exe = fluid.Executor(place)
# 进行参数初始化
exe.run(fluid.default_startup_program())

# 定义输入数据维度
feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

# 定义日志的开始位置和获取参数名称
train_step = 0
test_step = 0
params_name = fluid.default_startup_program().global_block().all_parameters(
)[0].name

# 训练10次
for pass_id in range(10):
    # 进行训练
    for batch_id, data in enumerate(train_reader()):
        train_cost, train_acc, params = exe.run(
            program=fluid.default_main_program(),
            feed=feeder.feed(data),
            fetch_list=[avg_cost, acc, params_name])
Beispiel #26
0
def compress(args):

    train_reader = None
    test_reader = None
    if args.data == "mnist":
        import paddle.dataset.mnist as reader
        train_reader = reader.train()
        val_reader = reader.test()
        class_dim = 10
        image_shape = "1,28,28"
    elif args.data == "imagenet":
        import imagenet_reader as reader
        train_reader = reader.train()
        val_reader = reader.val()
        class_dim = 1000
        image_shape = "3,224,224"
    else:
        raise ValueError("{} is not supported.".format(args.data))

    image_shape = [int(m) for m in image_shape.split(",")]
    assert args.model in model_list, "{} is not in lists: {}".format(
        args.model, model_list)
    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    # model definition
    model = models.__dict__[args.model]()
    out = model.net(input=image, class_dim=class_dim)
    cost = fluid.layers.cross_entropy(input=out, label=label)
    avg_cost = fluid.layers.mean(x=cost)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    val_program = fluid.default_main_program().clone(for_test=True)
    opt = create_optimizer(args)
    opt.minimize(avg_cost)
    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

    if args.pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(args.pretrained_model,
                                               var.name))

        fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)

    val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
    train_reader = paddle.batch(train_reader,
                                batch_size=args.batch_size,
                                drop_last=True)

    train_feeder = feeder = fluid.DataFeeder([image, label], place)
    val_feeder = feeder = fluid.DataFeeder([image, label],
                                           place,
                                           program=val_program)

    def test(epoch, program):
        batch_id = 0
        acc_top1_ns = []
        acc_top5_ns = []
        for data in val_reader():
            start_time = time.time()
            acc_top1_n, acc_top5_n = exe.run(
                program,
                feed=train_feeder.feed(data),
                fetch_list=[acc_top1.name, acc_top5.name])
            end_time = time.time()
            if batch_id % args.log_period == 0:
                _logger.info(
                    "Eval epoch[{}] batch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}"
                    .format(epoch, batch_id, np.mean(acc_top1_n),
                            np.mean(acc_top5_n), end_time - start_time))
            acc_top1_ns.append(np.mean(acc_top1_n))
            acc_top5_ns.append(np.mean(acc_top5_n))
            batch_id += 1

        _logger.info(
            "Final eval epoch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}".format(
                epoch, np.mean(np.array(acc_top1_ns)),
                np.mean(np.array(acc_top5_ns))))
        return np.mean(np.array(acc_top1_ns))

    def train(epoch, program):

        build_strategy = fluid.BuildStrategy()
        exec_strategy = fluid.ExecutionStrategy()
        train_program = fluid.compiler.CompiledProgram(
            program).with_data_parallel(loss_name=avg_cost.name,
                                        build_strategy=build_strategy,
                                        exec_strategy=exec_strategy)

        batch_id = 0
        for data in train_reader():
            start_time = time.time()
            loss_n, acc_top1_n, acc_top5_n = exe.run(
                train_program,
                feed=train_feeder.feed(data),
                fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
            end_time = time.time()
            loss_n = np.mean(loss_n)
            acc_top1_n = np.mean(acc_top1_n)
            acc_top5_n = np.mean(acc_top5_n)
            if batch_id % args.log_period == 0:
                _logger.info(
                    "epoch[{}]-batch[{}] - loss: {:.3f}; acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}"
                    .format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
                            end_time - start_time))
            batch_id += 1

    params = []
    for param in fluid.default_main_program().global_block().all_parameters():
        if "_sep_weights" in param.name:
            params.append(param.name)

    def eval_func(program):
        return test(0, program)

    if args.data == "mnist":
        train(0, fluid.default_main_program())

    pruner = SensitivePruner(place, eval_func, checkpoints=args.checkpoints)
    pruned_program, pruned_val_program, iter = pruner.restore()

    if pruned_program is None:
        pruned_program = fluid.default_main_program()
    if pruned_val_program is None:
        pruned_val_program = val_program

    start = iter
    end = 6
    for iter in range(start, end):
        pruned_program, pruned_val_program = pruner.prune(
            pruned_program, pruned_val_program, params, 0.1)
        train(iter, pruned_program)
        test(iter, pruned_val_program)
        pruner.save_checkpoint(pruned_program, pruned_val_program)

    print("before flops: {}".format(flops(fluid.default_main_program())))
    print("after flops: {}".format(flops(pruned_val_program)))
Beispiel #27
0
def train_parallel_exe(args,
                       learning_rate,
                       batch_size,
                       num_passes,
                       init_model=None,
                       pretrained_model=None,
                       model_save_dir='model',
                       parallel=True,
                       use_nccl=True,
                       lr_strategy=None,
                       layers=50):
    class_dim = 1000
    image_shape = [3, 224, 224]

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    if args.model is 'se_resnext':
        out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
    else:
        out = mobile_net(img=image, class_dim=class_dim)

    cost = fluid.layers.cross_entropy(input=out, label=label)
    acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
    acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
    avg_cost = fluid.layers.mean(x=cost)

    test_program = fluid.default_main_program().clone(for_test=True)

    if "piecewise_decay" in lr_strategy:
        bd = lr_strategy["piecewise_decay"]["bd"]
        lr = lr_strategy["piecewise_decay"]["lr"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(boundaries=bd,
                                                       values=lr),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    elif "cosine_decay" in lr_strategy:
        step_each_epoch = lr_strategy["cosine_decay"]["step_each_epoch"]
        epochs = lr_strategy["cosine_decay"]["epochs"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=cosine_decay(learning_rate=learning_rate,
                                       step_each_epoch=step_each_epoch,
                                       epochs=epochs),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    else:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=learning_rate,
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))

    opts = optimizer.minimize(avg_cost)

    if args.with_mem_opt:
        fluid.memory_optimize(fluid.default_main_program())

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    fluid.default_startup_program.random_seed = 1000
    exe.run(fluid.default_startup_program())

    if init_model is not None:
        fluid.io.load_persistables(exe, init_model)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(exe, pretrained_model, predicate=if_exist)

    train_reader = paddle.batch(flowers.train(), batch_size=batch_size)
    test_reader = paddle.batch(flowers.test(), batch_size=batch_size)
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=avg_cost.name)
    test_exe = fluid.ParallelExecutor(use_cuda=True,
                                      main_program=test_program,
                                      share_vars_from=train_exe)

    fetch_list = [avg_cost.name, acc_top1.name, acc_top5.name]
    train_speed = []
    for pass_id in range(num_passes):
        train_info = [[], [], []]
        test_info = [[], [], []]
        pass_time = 0
        pass_num = 0
        pass_speed = 0.0
        for batch_id, data in enumerate(train_reader()):
            t1 = time.time()
            loss, acc1, acc5 = train_exe.run(fetch_list,
                                             feed=feeder.feed(data))
            t2 = time.time()
            period = t2 - t1
            pass_time += period
            pass_num += len(data)
            loss = np.mean(np.array(loss))
            acc1 = np.mean(np.array(acc1))
            acc5 = np.mean(np.array(acc5))
            train_info[0].append(loss)
            train_info[1].append(acc1)
            train_info[2].append(acc5)
            if batch_id % 10 == 0:
                print("Pass {0}, trainbatch {1}, loss {2}, \
                       acc1 {3}, acc5 {4} time {5}"
                                                   .format(pass_id, \
                       batch_id, loss, acc1, acc5, \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
        pass_speed = pass_num / pass_time
        train_speed.append(pass_speed)
        if pass_id == num_passes - 1:
            train_acc_top1_kpi.add_record(train_acc1)
            train_acc_top5_kpi.add_record(train_acc5)
            train_cost_kpi.add_record(train_loss)
            mean_pass_speed = np.array(pass_speed).mean()
            train_speed_kpi.add_record(mean_pass_speed)
        for data in test_reader():
            t1 = time.time()
            loss, acc1, acc5 = test_exe.run(fetch_list, feed=feeder.feed(data))
            t2 = time.time()
            period = t2 - t1
            loss = np.mean(np.array(loss))
            acc1 = np.mean(np.array(acc1))
            acc5 = np.mean(np.array(acc5))
            test_info[0].append(loss)
            test_info[1].append(acc1)
            test_info[2].append(acc5)
            if batch_id % 10 == 0:
                print("Pass {0},testbatch {1},loss {2}, \
                       acc1 {3},acc5 {4},time {5}"
                                                  .format(pass_id, \
                       batch_id, loss, acc1, acc5, \
                       "%2.2f sec" % period))
                sys.stdout.flush()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, \
               test_loss {4}, test_acc1 {5}, test_acc5 {6}, pass_time {7}, train_speed {8}"
                                                           .format(pass_id, \
              train_loss, train_acc1, train_acc5, test_loss, test_acc1, \
              test_acc5, pass_time, pass_num / pass_time))
        sys.stdout.flush()
    train_acc_top1_kpi.persist()
    train_acc_top5_kpi.persist()
    train_cost_kpi.persist()
    train_speed_kpi.persist()
Beispiel #28
0
def test_converter():
    img = fluid.layers.data(name='image', shape=[1, 28, 28])
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
    feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
    result = feeder.feed([[[0] * 784, [9]], [[1] * 784, [1]]])
    print(result)
fluid.backward.append_backward(loss=avg_cost_clip,
                               callbacks=[fluid.clip.error_clip_callback])

hidden1_grad = prog.block(0).var(hidden1.name + "@GRAD")
hidden1_grad_clip = prog_clip.block(0).var(hidden1.name + "@GRAD")

hidden2_grad = prog.block(0).var(hidden2.name + "@GRAD")
hidden2_grad_clip = prog_clip.block(0).var(hidden2.name + "@GRAD")

train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.mnist.train(),
                                                  buf_size=8192),
                            batch_size=BATCH_SIZE)

place = fluid.CPUPlace()
exe = fluid.Executor(place)
feeder = fluid.DataFeeder(feed_list=[image, label], place=place)
exe.run(fluid.default_startup_program())

count = 0
for data in train_reader():
    count += 1
    if count > 5:
        break
    out1, out2 = exe.run(prog,
                         feed=feeder.feed(data),
                         fetch_list=[hidden1_grad, hidden2_grad])
    out1_clip, out2_clip = exe.run(
        prog_clip,
        feed=feeder.feed(data),
        fetch_list=[hidden1_grad_clip, hidden2_grad_clip])
    if not ((out1.clip(min=CLIP_MIN, max=CLIP_MAX) == out1_clip).all() and
def train(avg_loss, infer_prog, optimizer, train_reader, test_reader,
          batch_acc, args, train_prog, startup_prog):
    if os.getenv("TRAINING_ROLE") == "PSERVER":
        place = core.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup_prog)
        exe.run(train_prog)
        return

    if args.use_fake_data:
        raise Exception(
            "fake data is not supported in single GPU test for now.")

    place = core.CPUPlace() if args.device == 'CPU' else core.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(startup_prog)

    # Use inference_transpiler to speedup
    if not args.use_reader_op:
        feed_var_list = [
            var for var in train_prog.global_block().vars.itervalues()
            if var.is_data
        ]
        feeder = fluid.DataFeeder(feed_var_list, place)

    iters, num_samples, start_time = 0, 0, time.time()
    for pass_id in range(args.pass_num):
        train_losses = []
        if not args.use_reader_op:
            reader_generator = train_reader()
        batch_id = 0
        data = None
        while True:
            if not args.use_reader_op:
                data = next(reader_generator, None)
                if data == None:
                    break
            if iters == args.iterations:
                break
            if iters == args.skip_batch_num:
                start_time = time.time()
                num_samples = 0

            if args.use_reader_op:
                try:
                    loss = exe.run(train_prog, fetch_list=[avg_loss])
                except fluid.core.EnforceNotMet as ex:
                    break
            else:
                loss = exe.run(train_prog,
                               feed=feeder.feed(data),
                               fetch_list=[avg_loss])
            iters += 1
            batch_id += 1
            # FIXME(wuyi): For use_reader_op, if the current
            # pass is not the last, the last batch of this pass
            # is also equal to args.batch_size.
            if args.use_reader_op:
                num_samples += args.batch_size * args.gpus
            else:
                num_samples += len(data)
            train_losses.append(loss)
            print("Pass: %d, Iter: %d, Loss: %f\n" %
                  (pass_id, iters, np.mean(train_losses)))
        print_train_time(start_time, time.time(), num_samples)
        print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))),
        # evaluation
        if not args.no_test and batch_acc and not args.use_reader_op:
            if args.use_inference_transpiler:
                t = fluid.InferenceTranspiler()
                t.transpile(infer_prog, place)

            pass_test_acc = test(exe, infer_prog, test_reader, feeder,
                                 batch_acc)
            print(", Test Accuracy: %f" % pass_test_acc)
        print("\n")
        # TODO(wuyi): add warmup passes to get better perf data.
        exit(0)