Exemple #1
0
 def get_places(self):
     place_list = [fluid.cpu_places(1), fluid.cpu_places(4)]
     if fluid.is_compiled_with_cuda():
         place_list.extend(
             [fluid.cuda_places(0),
              fluid.cuda_places([0, 1])])
     return place_list
Exemple #2
0
def default_exe_params(is_distributed, use_cuda, thread_num):
    """
    Set the default execute parameters.
    """
    gpu_id = 0
    trainer_num = 1
    trainer_id = 0
    dist_strategy = None
    places = None
    if is_distributed:
        if use_cuda:
            role = role_maker.PaddleCloudRoleMaker(is_collective=True)
            fleet.init(role)

            gpu_id = int(os.getenv("FLAGS_selected_gpus"))
            trainer_num = fleet.worker_num()
            trainer_id = fleet.worker_index()

            exec_strategy = fluid.ExecutionStrategy()
            exec_strategy.use_experimental_executor = True
            exec_strategy.num_threads = 4
            exec_strategy.num_iteration_per_drop_scope = 1

            dist_strategy = DistributedStrategy()
            dist_strategy.exec_strategy = exec_strategy
            dist_strategy.nccl_comm_num = 2
            dist_strategy.fuse_all_reduce_ops = True

            dist_strategy.forward_recompute = True

            dist_strategy.use_amp = True
            dist_strategy.amp_loss_scaling = 12800.0

            places = fluid.cuda_places()
        else:
            print('Only gpu is supported for distributed mode at present.')
            exit(-1)
    else:
        if use_cuda:
            places = fluid.cuda_places()
        else:
            places = fluid.cpu_places(thread_num)
            os.environ['CPU_NUM'] = str(thread_num)

    if use_cuda:
        exe = fluid.Executor(fluid.CUDAPlace(gpu_id))
    else:
        exe = fluid.Executor(fluid.CPUPlace())

    return {
        'exe': exe,
        'trainer_num': trainer_num,
        'trainer_id': trainer_id,
        'gpu_id': gpu_id,
        'dist_strategy': dist_strategy,
        'places': places
    }
Exemple #3
0
    def __init__(self, model_type):
        self.model_type = model_type
        # 现有的CV模型都有这个属性,而这个属且也需要在eval时用到
        self.num_classes = None
        self.labels = None
        self.version = paddlex.__version__
        if paddlex.env_info['place'] == 'cpu':
            self.places = fluid.cpu_places()
        else:
            self.places = fluid.cuda_places()
        self.exe = fluid.Executor(self.places[0])
        self.train_prog = None
        self.test_prog = None
        self.parallel_train_prog = None
        self.train_inputs = None
        self.test_inputs = None
        self.train_outputs = None
        self.test_outputs = None
        self.train_data_loader = None
        self.eval_metrics = None
        # 若模型是从inference model加载进来的,无法调用训练接口进行训练
        self.trainable = True
        # 是否使用多卡间同步BatchNorm均值和方差
        self.sync_bn = False
        # 当前模型状态
        self.status = 'Normal'
        # 已完成迭代轮数,为恢复训练时的起始轮数
        self.completed_epochs = 0
        self.scope = fluid.global_scope()

        # 线程池,在模型在预测时用于对输入数据以图片为单位进行并行处理
        # 主要用于batch_predict接口
        thread_num = mp.cpu_count() if mp.cpu_count() < 8 else 8
        self.thread_pool = mp.pool.ThreadPool(thread_num)
Exemple #4
0
def main(args):
    config = get_config(args.config, overrides=args.override, show=True)
    use_gpu = config.get("use_gpu", True)
    places = fluid.cuda_places() if use_gpu else fluid.cpu_places()

    startup_prog = fluid.Program()
    valid_prog = fluid.Program()
    valid_dataloader, valid_fetchs = program.build(config,
                                                   valid_prog,
                                                   startup_prog,
                                                   is_train=False,
                                                   is_distributed=False)
    valid_prog = valid_prog.clone(for_test=True)

    exe = fluid.Executor(places[0])
    exe.run(startup_prog)

    init_model(config, valid_prog, exe)

    valid_reader = Reader(config, 'valid')()
    valid_dataloader.set_sample_list_generator(valid_reader, places)

    compiled_valid_prog = program.compile(config, valid_prog)
    program.run(valid_dataloader, exe, compiled_valid_prog, valid_fetchs, -1,
                'eval')
Exemple #5
0
def main(args):
    import logging
    log.setLevel(logging.DEBUG)
    log.info("start")

    num_devices = len(F.cuda_places())
    model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num,
                          False, False, 1.)
    pyreader = model.pyreader
    loss = model.forward()

    train_steps = int(args.num_nodes * args.epoch / args.batch_size /
                      num_devices)
    optimization(args.lr * num_devices, loss, train_steps, args.optimizer)

    place = F.CUDAPlace(0)
    exe = F.Executor(place)
    exe.run(F.default_startup_program())

    graph = build_graph(args.num_nodes, args.edge_path)
    gen_func = build_gen_func(args, graph)

    pyreader.decorate_tensor_provider(gen_func)
    pyreader.start()

    train_prog = F.default_main_program()

    if args.warm_start_from_dir is not None:
        F.io.load_params(exe, args.warm_start_from_dir, train_prog)

    train_exe = get_parallel_exe(train_prog, loss)
    train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
Exemple #6
0
 def __init__(self, model_type):
     self.model_type = model_type
     # 现有的CV模型都有这个属性,而这个属且也需要在eval时用到
     self.num_classes = None
     self.labels = None
     self.version = paddlex.__version__
     if paddlex.env_info['place'] == 'cpu':
         self.places = fluid.cpu_places()
     else:
         self.places = fluid.cuda_places()
     self.exe = fluid.Executor(self.places[0])
     self.train_prog = None
     self.test_prog = None
     self.parallel_train_prog = None
     self.train_inputs = None
     self.test_inputs = None
     self.train_outputs = None
     self.test_outputs = None
     self.train_data_loader = None
     self.eval_metrics = None
     # 若模型是从inference model加载进来的,无法调用训练接口进行训练
     self.trainable = True
     # 是否使用多卡间同步BatchNorm均值和方差
     self.sync_bn = False
     # 当前模型状态
     self.status = 'Normal'
     # 已完成迭代轮数,为恢复训练时的起始轮数
     self.completed_epochs = 0
def _get_activations_from_ims(img, model, batch_size, dims, use_gpu,
                              premodel_path):
    n_batches = (len(img) + batch_size - 1) // batch_size
    n_used_img = len(img)

    pred_arr = np.empty((n_used_img, dims))

    for i in tqdm(range(n_batches)):
        start = i * batch_size
        end = start + batch_size
        if end > len(img):
            end = len(img)
        images = img[start:end]
        if images.shape[1] != 3:
            images = images.transpose((0, 3, 1, 2))
        images /= 255

        output, main_program, startup_program = _build_program(model)
        place = fluid.cuda_places()[0] if use_gpu else fluid.CPUPlace()
        exe = fluid.Executor(place)
        exe.run(startup_program)

        fluid.load(main_program,
                   os.path.join(premodel_path, 'paddle_inceptionv3'), exe)
        pred = exe.run(main_program,
                       feed={'images': images},
                       fetch_list=[output])[0]

        pred_arr[start:end] = pred.reshape(end - start, -1)

    return pred_arr
Exemple #8
0
    def evaluate(self, eval_dataset, eval_hooks=[]):
        if not isinstance(eval_dataset, Dataset):
            raise ValueError(
                'expect dataset to be instance of Dataset, got %s' %
                repr(eval_dataset))
        program, model_spec = self.build_for_eval(eval_dataset)
        single_card_place = F.cuda_places()[0]
        eval_executor = F.Executor(single_card_place)

        eval_hooks = [
            hooks.StopAtStepHook(self.run_config.eval_max_steps,
                                 self.run_config.eval_max_steps),
            hooks.EvalHook(model_spec.metrics, )
        ]

        mon_exe = MonitoredExecutor(eval_executor,
                                    program,
                                    run_config=self.run_config,
                                    run_hooks=eval_hooks)
        mon_exe.init_or_restore_variables()

        try:
            with mon_exe:
                for data in eval_dataset.start(places=[single_card_place]):
                    mon_exe.run(feed=data)
        except (StopException, F.core.EOFException) as e:
            pass

        _, eval_result = mon_exe.result

        summary_writer = get_summary_writer(
            os.path.join(self.run_config.model_dir, 'eval_history'))
        log_eval_result('eval', eval_result, summary_writer, mon_exe.state)

        return mon_exe.result
Exemple #9
0
def train(use_cuda):
    # define program
    train_prog = fluid.Program()
    startup_prog = fluid.Program()
    with fluid.program_guard(train_prog, startup_prog):
        with fluid.unique_name.guard():
            # For training:
            # inputs = [src, src_sequence_length, trg, trg_sequence_length, label]
            inputs, loader = data_func(is_train=True)
            logits = model_func(inputs, is_train=True)
            loss = loss_func(logits, inputs[-1], inputs[-2])
            optimizer = optimizer_func()
            optimizer.minimize(loss)

    # define data source
    places = fluid.cuda_places() if use_cuda else fluid.cpu_places()
    loader.set_batch_generator(inputs_generator(batch_size,
                                                eos_id,
                                                is_train=True),
                               places=places)

    exe = fluid.Executor(places[0])
    exe.run(startup_prog)
    prog = fluid.CompiledProgram(train_prog).with_data_parallel(
        loss_name=loss.name)

    EPOCH_NUM = 20
    for pass_id in six.moves.xrange(EPOCH_NUM):
        batch_id = 0
        for data in loader():
            loss_val = exe.run(prog, feed=data, fetch_list=[loss])[0]
            print('pass_id: %d, batch_id: %d, loss: %f' %
                  (pass_id, batch_id, loss_val))
            batch_id += 1
        fluid.io.save_params(exe, model_save_dir, main_program=train_prog)
 def __init__(self,
              sent_emb_dim,
              word_emb_dim,
              sent_len,
              lr=0.001,
              bidirectional=False,
              dropout_prob=None,
              num_layers=1,
              use_gpu=True,
              emb_size_ratio=1.5):
     super(SkipThoughts, self).__init__(sent_len=sent_len)
     self.sent_emb_dim = sent_emb_dim
     self.word_emb_dim = word_emb_dim
     self.lr = lr
     self.bidirectional = bidirectional
     self.dropout_prob = dropout_prob
     self.num_layers = num_layers
     self.place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
     self.dataloader_places = fluid.cuda_places(
     ) if use_gpu else fluid.cpu_places(8)
     self.built = False
     self.test_emb_pin = 0
     self.init = False
     self.test_fitted = False
     self.emb_size_ratio = emb_size_ratio
Exemple #11
0
    def predict(self, predict_dataset, ckpt=None, steps=-1, split_batch=True):
        '''
        Perform predictoin
        will call `model_fn` and initiate user-specifed model in `propeller.RunMode.PREDICT` mode 

        Args:
            infer_dataset (propeller.data.Dataset): should not `shuffle` or `repeat`
            steps (int): steps to predict, if -1 is specifed, will stop when `StopException` is raised in `infer_dataset`
            split_batch (bool): if True, prediction of each example in a batch is returned.

        Yields:
            Evaluated values of predictions tensors.

        '''
        if not isinstance(predict_dataset, Dataset):
            raise ValueError(
                'expect dataset to be instance of Dataset, got %s' %
                repr(predict_dataset))

        program, model_spec = self.build_for_predict(predict_dataset)
        single_card_place = F.cuda_places()[0]
        executor = F.Executor(single_card_place)
        pred_run_config = RunConfig(run_steps=steps if steps == -1 else None,
                                    model_dir=self.run_config.model_dir)
        mon_exe = MonitoredExecutor(
            executor,
            program,
            run_config=pred_run_config,
        )
        mon_exe.init_or_restore_variables()
        try:
            with mon_exe:
                log.info('Runining predict from dir: %s' % repr(mon_exe.state))
                single_card_place = F.cuda_places()[0]
                for data in predict_dataset.start(places=[single_card_place]):
                    res = mon_exe.run(fetch_list=model_spec.predictions,
                                      feed=data)
                    if split_batch:
                        res = map(lambda i: i.tolist(), res)
                        res = zip(*res)  # transpose
                        for r in res:
                            yield r
                    else:
                        yield list(map(lambda i: i.tolist(), res))
        except (StopException, F.core.EOFException) as e:
            pass
Exemple #12
0
def serve(model_dir, host, num_concurrent=None):
    if six.PY2:
        raise RuntimeError('propeller service work in python3 only')
    num_worker = len(
        F.cuda_places()) if num_concurrent is None else num_concurrent
    pool = ThreadPoolExecutor(num_worker)

    class Predictor(object):
        def __init__(self, did):
            log.debug('create predictor on card %d' % did)
            config = F.core.AnalysisConfig(model_dir)
            config.enable_use_gpu(5000, did)
            self._predictor = F.core.create_paddle_predictor(config)

        @profile('paddle')
        def __call__(self, args):
            for i, a in enumerate(args):
                a.name = 'placeholder_%d' % i
            res = self._predictor.run(args)
            return res

    predictor_context = {}

    class InferenceService(interface_pb2_grpc.InferenceServicer):
        @profile('service')
        def Infer(self, request, context):
            try:
                slots = request.slots
                current_thread = threading.current_thread()
                log.debug('%d slots received dispatch to thread %s' %
                          (len(slots), current_thread))
                if current_thread not in predictor_context:
                    did = list(pool._threads).index(current_thread)
                    log.debug('spawning worker thread %d' % did)
                    predictor = Predictor(did)
                    predictor_context[current_thread] = predictor
                else:
                    predictor = predictor_context[current_thread]
                slots = [serv_utils.slot_to_paddlearray(s) for s in slots]
                ret = predictor(slots)
                response = [serv_utils.paddlearray_to_slot(r) for r in ret]
            except Exception as e:
                log.exception(e)
                raise e
            return interface_pb2.Slots(slots=response)

    server = grpc.server(pool)
    interface_pb2_grpc.add_InferenceServicer_to_server(InferenceService(),
                                                       server)
    server.add_insecure_port(host)
    server.start()
    log.info('server started on %s...' % host)
    try:
        while True:
            sleep(100000)
    except KeyboardInterrupt as e:
        pass
    log.info('server stoped...')
Exemple #13
0
def train_static_graph(epoch_num, use_multi_gpu):
    resnet = ResNet()
    image = fluid.data(name='image',
                       shape=[None] + train_parameters['input_size'],
                       dtype='float32')
    label = fluid.data(name='label', shape=[None], dtype='int64')
    out = resnet(image)
    loss = fluid.layers.cross_entropy(out, label)
    avg_loss = fluid.layers.mean(loss)
    optimizer = optimizer_setting(train_parameters,
                                  parameter_list=resnet.parameters())
    optimizer.minimize(avg_loss)

    program = fluid.default_main_program()
    if use_multi_gpu:
        program = fluid.CompiledProgram(program).with_data_parallel(
            loss_name=avg_loss.name)

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)

    exe.run(fluid.default_startup_program())
    image_shape = train_parameters['input_size']

    reader = create_reader()

    def single_device_feed_reader(reader):
        def __impl__():
            for data in reader():
                image_np = np.array(
                    [np.reshape(x[0], image_shape) for x in data])
                label_np = np.array([x[1] for x in data])
                yield {image.name: image_np, label.name: label_np}

        return __impl__

    reader = single_device_feed_reader(reader)
    if use_multi_gpu:
        reader = paddle.batch(reader,
                              batch_size=len(fluid.cuda_places()),
                              drop_last=True)

    for epoch_id in six.moves.range(epoch_num):
        for i, data in enumerate(reader()):
            avg_loss_val, = exe.run(program, feed=data, fetch_list=[avg_loss])

            if i % 10 == 0:
                print('Epoch {}, batch {}, avg_loss {}'.format(
                    epoch_id, i, avg_loss_val))

    fluid.io.save_inference_model('./infer_static_graph',
                                  feeded_var_names=[image.name],
                                  target_vars=[out],
                                  executor=exe)
Exemple #14
0
def main():
    global args, best_mIoU
    args = parser.parse_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    if args.dataset == 'LaneDet':
        num_class = 20
    else:
        raise ValueError('Unknown dataset ' + args.dataset)

    # get places
    places = fluid.cuda_places()

    with fluid.dygraph.guard():
        model = models.ERFNet(num_class, [576, 1024])
        input_mean = model.input_mean
        input_std = model.input_std

        if args.resume:
            print(("=> loading checkpoint '{}'".format(args.resume)))
            checkpoint, _ = fluid.load_dygraph(args.resume)
            model.load_dict(checkpoint)
            print("=> checkpoint loaded successfully")
        else:
            print(("=> loading checkpoint '{}'".format('trained/ERFNet_trained')))
            checkpoint, _ = fluid.load_dygraph('trained/ERFNet_trained')
            model.load_dict(checkpoint)
            print("=> default checkpoint loaded successfully")

        # Data loading code
        test_dataset = ds.LaneDataSet(
            dataset_path='datasets/PreliminaryData',
            data_list=args.val_list,
            transform=[
                lambda x: cv2.resize(x, (1024, 576)),
                lambda x: x - np.asarray(input_mean)[None, None, :] / np.array(input_std)[None, None, :],
            ]
        )

        test_loader = DataLoader(
            test_dataset,
            places=places[0],
            batch_size=1,
            shuffle=False,
            num_workers=args.workers,
            collate_fn=collate_fn
        )

        ### evaluate ###
        mIoU = validate(test_loader, model)
        # print('mIoU: {}'.format(mIoU))
    return
    def test_main(self):
        places = [fluid.cpu_places(4)]
        if fluid.is_compiled_with_cuda():
            places.append(fluid.cuda_places())

        for p in places:
            for has_persistable in [False, True]:
                for use_split in [False, True]:
                    self.run_network(p,
                                     use_split=use_split,
                                     has_persistable=has_persistable)
Exemple #16
0
    def __init__(self,
                 num_classes=2,
                 use_bce_loss=False,
                 use_dice_loss=False,
                 class_weight=None,
                 ignore_index=255,
                 sync_bn=True):
        self.init_params = locals()
        if num_classes > 2 and (use_bce_loss or use_dice_loss):
            raise ValueError(
                "dice loss and bce loss is only applicable to binary classfication"
            )

        if class_weight is not None:
            if isinstance(class_weight, list):
                if len(class_weight) != num_classes:
                    raise ValueError(
                        "Length of class_weight should be equal to number of classes"
                    )
            elif isinstance(class_weight, str):
                if class_weight.lower() != 'dynamic':
                    raise ValueError(
                        "if class_weight is string, must be dynamic!")
            else:
                raise TypeError(
                    'Expect class_weight is a list or string but receive {}'.
                    format(type(class_weight)))

        self.num_classes = num_classes
        self.use_bce_loss = use_bce_loss
        self.use_dice_loss = use_dice_loss
        self.class_weight = class_weight
        self.ignore_index = ignore_index
        self.sync_bn = sync_bn

        self.labels = None
        self.env_info = get_environ_info()
        if self.env_info['place'] == 'cpu':
            self.places = fluid.cpu_places()
        else:
            self.places = fluid.cuda_places()
        self.exe = fluid.Executor(self.places[0])
        self.train_prog = None
        self.test_prog = None
        self.parallel_train_prog = None
        self.train_inputs = None
        self.test_inputs = None
        self.train_outputs = None
        self.test_outputs = None
        self.train_data_loader = None
        self.eval_metrics = None
        # 当前模型状态
        self.status = 'Normal'
    def get_data_run_places(args):
        """
        根据获取数据层(dataloader)的运行位置
        :return: 运行位置
        """
        USE_PARALLEL = args["use_parallel"]
        USE_GPU = args["use_gpu"]
        NUM_OF_DEVICE = args["num_of_device"]

        if USE_PARALLEL and NUM_OF_DEVICE > 1:
            if USE_GPU:
                os.environ['CUDA_VISIBLE_DEVICES'] = str(NUM_OF_DEVICE)
                places = fluid.cuda_places()
            else:
                places = fluid.cpu_places(NUM_OF_DEVICE)
        else:
            if USE_GPU:
                places = fluid.cuda_places(0)
            else:
                places = fluid.cpu_places(1)
        return places
Exemple #18
0
def cosine_decay(args):
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    step = int(
        math.ceil(float(args.total_images) / (args.batch_size * len(places))))
    learning_rate = fluid.layers.cosine_decay(learning_rate=args.lr,
                                              step_each_epoch=step,
                                              epochs=args.num_epochs)
    optimizer = fluid.optimizer.Momentum(
        learning_rate=learning_rate,
        momentum=args.momentum_rate,
        regularization=fluid.regularizer.L2Decay(args.l2_decay))
    return learning_rate, optimizer
Exemple #19
0
def piecewise_decay(args):
    places = fluid.cuda_places() if args.use_gpu else fluid.cpu_places()
    step = int(
        math.ceil(float(args.total_images) / (args.batch_size * len(places))))
    bd = [step * e for e in args.step_epochs]
    lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
    learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
    optimizer = fluid.optimizer.Momentum(
        learning_rate=learning_rate,
        momentum=args.momentum_rate,
        regularization=fluid.regularizer.L2Decay(args.l2_decay))
    return learning_rate, optimizer
Exemple #20
0
def main(args):

    # construct the sample
    input, output, data_size = construct_sample(args)
    # construct the train program
    train_program = fluid.Program()
    startup_program = fluid.Program()

    with fluid.program_guard(train_program, startup_program):
        seq2seq_model = SeqModel(args.seq_num, args.batch_size,
                                 args.hidden_size)
        ret_dict = seq2seq_model.build_graph()
    val_program = train_program.clone()
    with fluid.program_guard(train_program, startup_program):
        optimizer = fluid.optimizer.Adam(args.lr)
        optimizer.minimize(ret_dict.loss)
    places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places()

    train_loader = fluid.io.DataLoader.from_generator(
        feed_list=ret_dict.feed_list, capacity=3, iterable=True)
    train_loader.set_batch_generator(train_reader(input, output, data_size,
                                                  args.batch_size),
                                     places=places)

    exe = Executor(places[0])
    exe.run(startup_program)

    # train stage:use data_loader as reader
    for _ in range(args.epoch):
        for data in train_loader():
            results = exe.run(train_program,
                              feed=data,
                              fetch_list=ret_dict.fetch_list)
            print("train process loss:{}".format(results[0]))
    # save the model for inferenceing
    with fluid.program_guard(train_program, startup_program):
        fluid.io.save_inference_model(dirname="./model", feeded_var_names=['feat', 'lod'], \
            target_vars=[ret_dict.last_predict], executor=exe, export_for_deployment=True)

    # val stage: use data_loader as reader
    val_loader = fluid.io.DataLoader.from_generator(
        feed_list=ret_dict.feed_list, capacity=3, iterable=True)
    val_loader.set_batch_generator(val_reader(input, output, data_size,
                                              output.shape[0]),
                                   places=places)
    for _ in range(1):
        for data in train_loader:
            results = exe.run(train_program,
                              feed=data,
                              fetch_list=ret_dict.fetch_list)
            print("val process loss:{}".format(results[0]))
    def prepare_places(self, with_data_parallel, with_cpu=True, with_gpu=True):
        places = []
        if with_cpu:
            places.append([fluid.CPUPlace()])
            if with_data_parallel:
                places.append([fluid.CPUPlace()] * 2)

        if with_gpu and fluid.core.is_compiled_with_cuda():
            tmp = fluid.cuda_places()
            assert len(tmp) > 0, "no gpu detected"
            if with_data_parallel:
                places.append(tmp)
            places.append([tmp[0]])
        return places
Exemple #22
0
    def start(self, places=F.cuda_places()):
        #assert self.pyreader is not None, 'use Dataset.features to build net first, then start dataset'
        def gen():
            try:
                for idx, i in enumerate(self.generator()):
                    yield i
            except Exception as e:
                log.exception(e)
                raise e

        r = F.io.PyReader(
            feed_list=self.placeholders(), capacity=50, iterable=True)
        r.decorate_batch_generator(gen, places=places)
        return r()
Exemple #23
0
    def check_multi_card_fetch_var(self):
        if self.is_invalid_test():
            return

        prog1, scope1, exe, loss1 = self.build_program_and_scope()
        scopes = []
        compiled_programs = []

        if self.use_cuda:
            places = fluid.cuda_places()
        else:
            places = fluid.cpu_places(self.device_count)

        for memory_optimize in [False, True]:
            for enable_inplace in [False, True]:
                prog, scope, _, loss = self.build_program_and_scope()
                scopes.append(scope)
                build_strategy = fluid.BuildStrategy()
                build_strategy.memory_optimize = memory_optimize
                build_strategy.enable_inplace = enable_inplace
                build_strategy.fuse_all_optimizer_ops = self.fuse_all_optimizer_ops
                compiled_program = fluid.CompiledProgram(
                    prog).with_data_parallel(
                        loss_name=loss.name,
                        build_strategy=build_strategy,
                        places=places)
                compiled_programs.append(compiled_program)

        repeated_var_names = self.get_all_vars(prog1) * 2
        random.shuffle(repeated_var_names)  # add some random 

        for fetch_var in repeated_var_names:
            for _ in range(4):
                fetch_vals = []
                for scope, compiled_prog in zip(scopes, compiled_programs):
                    with fluid.scope_guard(scope):
                        fetch_val, = exe.run(compiled_prog,
                                             feed=feed_dict,
                                             fetch_list=[fetch_var])
                        fetch_vals.append(fetch_val)

                for item in fetch_vals:
                    self.assertTrue(np.array_equal(fetch_vals[0], item))
                    self.assertTrue(
                        np.array_equal(fetch_vals[0], item),
                        "error var name: {}, fetch_vals[0]: {}, item: {}".
                        format(fetch_var,
                               fetch_vals[0][~np.equal(fetch_vals[0], item)],
                               item[~np.equal(fetch_vals[0], item)]))
Exemple #24
0
def main(args):
    input, output, data_size = construct_sample(args)
    places = fluid.cuda_places() if args.use_cuda else fluid.cpu_places()
    exe = Executor(places[0])
    [inference_program, feed_target_names,
     fetch_targets] = (fluid.io.load_inference_model(dirname="./model",
                                                     executor=exe))
    feat, lod = test_reader()
    result = exe.run(inference_program,
                     feed={
                         feed_target_names[0]: feat,
                         feed_target_names[1]: lod
                     },
                     fetch_list=fetch_targets)
    print(result[0].shape)
    output_final_result(result[0])
Exemple #25
0
 def __init__(self,
              learning_rate,
              momentum,
              parameter_list=None,
              regularization=None,
              config=None,
              **args):
     super(Momentum, self).__init__()
     self.learning_rate = learning_rate
     self.momentum = momentum
     self.parameter_list = parameter_list
     self.regularization = regularization
     self.multi_precision = config.get('multi_precision', False)
     self.rescale_grad = (
         1.0 / (config['TRAIN']['batch_size'] / len(fluid.cuda_places()))
         if config.get('use_pure_fp16', False) else 1.0)
Exemple #26
0
    def train(self, train_ds, train_hooks=[]):
        if not isinstance(train_ds, Dataset):
            raise ValueError(
                'expect dataset to be instance of Dataset, got %s' %
                repr(train_ds))

        train_program, model_spec, summary_record = self.build_for_train(
            train_ds)
        train_run_hooks = [
            hooks.StopAtStepHook(self.run_config.max_steps,
                                 self.run_config.run_steps),
            hooks.LoggingHook(model_spec.loss,
                              summary_record=summary_record,
                              summary_writer=get_summary_writer(
                                  os.path.join(self.run_config.model_dir,
                                               'train_history')),
                              per_step=self.run_config.log_steps,
                              skip_step=self.run_config.skip_steps),
        ]
        train_run_hooks.extend(train_hooks)
        train_executor = F.Executor(F.cuda_places()[0])

        mon_exe = MonitoredExecutor(train_executor,
                                    train_program,
                                    loss=model_spec.loss,
                                    run_config=self.run_config,
                                    run_hooks=train_run_hooks,
                                    warm_start_setting=self.warm_start_setting)

        distribution.init_distribuition_env(
            train_program)  #only initialize distribute training with
        mon_exe.init_or_restore_variables()
        if distribution.status.is_master:
            mon_exe._hooks.append(
                hooks.CheckpointSaverHook(mon_exe._saver,
                                          per_step=mon_exe._save_steps,
                                          skip_step=mon_exe._skip_steps))

        try:
            with mon_exe:
                for data in train_ds.start():
                    mon_exe.run(feed=data)
        except (StopException, F.core.EOFException) as e:
            pass

        return mon_exe.result
Exemple #27
0
    def __init__(self, args):
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.lr_strategy = args.lr_strategy
        self.l2_decay = args.l2_decay
        self.momentum_rate = args.momentum_rate
        self.step_epochs = args.step_epochs
        self.num_epochs = args.num_epochs
        self.warm_up_epochs = args.warm_up_epochs
        self.decay_epochs = args.decay_epochs
        self.decay_rate = args.decay_rate
        self.total_images = args.total_images
        self.multi_precision = args.multi_precision
        self.rescale_grad = (1.0 / (args.batch_size / len(fluid.cuda_places()))
                             if args.use_pure_fp16 else 1.0)

        self.step = int(math.ceil(float(self.total_images) / self.batch_size))
Exemple #28
0
    def test_analysis_helper(self):
        image = fluid.layers.data(name='image',
                                  shape=[1, 28, 28],
                                  dtype='float32')
        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
        model = MobileNet()
        out = model.net(input=image, class_dim=10)
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
        optimizer = fluid.optimizer.Momentum(
            momentum=0.9,
            learning_rate=0.01,
            regularization=fluid.regularizer.L2Decay(4e-5))
        optimizer.minimize(avg_cost)
        main_prog = fluid.default_main_program()

        places = fluid.cuda_places() if fluid.is_compiled_with_cuda(
        ) else fluid.cpu_places()
        exe = fluid.Executor(places[0])
        train_reader = paddle.fluid.io.batch(paddle.dataset.mnist.train(),
                                             batch_size=64)
        train_loader = fluid.io.DataLoader.from_generator(
            feed_list=[image, label],
            capacity=512,
            use_double_buffer=True,
            iterable=True)
        train_loader.set_sample_list_generator(train_reader, places)
        exe.run(fluid.default_startup_program())

        vars = ['conv2d_0.tmp_0', 'fc_0.tmp_0', 'fc_0.tmp_1', 'fc_0.tmp_2']
        var_collector1 = VarCollector(main_prog, vars, use_ema=True)
        values = var_collector1.abs_max_run(train_loader,
                                            exe,
                                            step=None,
                                            loss_name=avg_cost.name)
        vars = [v.name for v in main_prog.list_vars() if v.persistable]
        var_collector2 = VarCollector(main_prog, vars, use_ema=False)
        values = var_collector2.run(train_loader,
                                    exe,
                                    step=None,
                                    loss_name=avg_cost.name)
        var_collector2.pdf(values)
Exemple #29
0
def main(args):
    """ main
    """
    import logging
    log.setLevel(logging.DEBUG)
    log.info("start")

    if args.dataset is not None:
        if args.dataset == "BlogCatalog":
            graph = data_loader.BlogCatalogDataset().graph
        else:
            raise ValueError(args.dataset + " dataset doesn't exists")
        log.info("Load buildin BlogCatalog dataset done.")
        node_feat = np.expand_dims(graph.node_feat["group_id"].argmax(-1),
                                   -1) + graph.num_nodes
        args.num_nodes = graph.num_nodes
        args.num_embedding = graph.num_nodes + graph.node_feat[
            "group_id"].shape[-1]
    else:
        graph = build_graph(args.num_nodes, args.edge_path, args.output_path)
        node_feat = np.load(args.node_feat_npy)

    model = GESModel(args.num_embedding, node_feat.shape[1] + 1,
                     args.hidden_size, args.neg_num, False, 2)
    pyreader = model.pyreader
    loss = model.forward()
    num_devices = len(F.cuda_places())

    train_steps = int(args.num_nodes * args.epoch / args.batch_size /
                      num_devices)
    log.info("Train steps: %s" % train_steps)
    optimization(args.lr * num_devices, loss, train_steps, args.optimizer)

    place = F.CUDAPlace(0)
    exe = F.Executor(place)
    exe.run(F.default_startup_program())

    gen_func = build_gen_func(args, graph, node_feat)

    pyreader.decorate_tensor_provider(gen_func)
    pyreader.start()
    train_prog = F.default_main_program()
    train_exe = get_parallel_exe(train_prog, loss)
    train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
    def run_network(self, iterable, use_cuda, drop_last):
        x = fluid.data(shape=[None, 1], name='x', dtype='float32')
        places = fluid.cuda_places() if use_cuda else fluid.cpu_places(4)
        loader = fluid.io.DataLoader.from_generator(feed_list=[x],
                                                    capacity=16,
                                                    iterable=iterable,
                                                    drop_last=drop_last)
        y = fluid.layers.fc(x, size=10)
        loss = fluid.layers.reduce_mean(y)

        exe = fluid.Executor(places[0])
        exe.run(fluid.default_startup_program())

        prog = fluid.CompiledProgram(
            fluid.default_main_program()).with_data_parallel(
                places=places, loss_name=loss.name)

        loader.set_batch_generator(self.create_reader(),
                                   places=places if iterable else None)

        for _ in six.moves.range(self.epoch_num):
            actual_batch_num = 0
            if loader.iterable:
                for feed_data in loader():
                    x_data, = exe.run(prog, feed=feed_data, fetch_list=[x])
                    self.assertEqual(x_data.shape[0] % self.batch_size, 0)
                    self.assertTrue(x_data.shape[0] != 0)
                    actual_batch_num += int(x_data.shape[0] / self.batch_size)
            else:
                loader.start()
                try:
                    while True:
                        x_data, = exe.run(prog, fetch_list=[x])
                        self.assertEqual(x_data.shape[0] % self.batch_size, 0)
                        self.assertTrue(x_data.shape[0] != 0)
                        actual_batch_num += int(x_data.shape[0] /
                                                self.batch_size)
                except fluid.core.EOFException:
                    loader.reset()

            if not drop_last or len(places) == 1:
                self.assertEqual(self.batch_num, actual_batch_num)
            else:
                self.assertGreater(self.batch_num, actual_batch_num)