Ejemplo n.º 1
0
def main(args):
    flow.env.init()
    flow.enable_eager_execution()

    start_t = time.time()
    posenet_module = PoseNet()
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    start_t = time.time()
    pretrain_models = flow.load(args.model_path)
    posenet_module.load_state_dict(pretrain_models)
    end_t = time.time()
    print("load params time : {}".format(end_t - start_t))

    posenet_module.eval()
    posenet_module.to("cuda")

    start_t = time.time()
    image = load_image(args.image_path)
    image = flow.Tensor(image, device=flow.device("cuda"))
    logits = posenet_module(image)
    predictions = logits.softmax()
    predictions = predictions.numpy()
    end_t = time.time()
    print("infer time : {}".format(end_t - start_t))
    clsidx = np.argmax(predictions)
    print("predict prob: %f, class name: %s" %
          (np.max(predictions), clsidx_2_labels[clsidx]))
Ejemplo n.º 2
0
    def setUp(self):
        global _unittest_env_initilized
        global _unittest_worker_initilized

        if has_node_list():
            assert node_size() > 1

            if _unittest_worker_initilized == False:
                oneflow.env.machine(node_list())

                ctrl_port = os.getenv("ONEFLOW_TEST_CTRL_PORT")
                assert ctrl_port, "env var ONEFLOW_TEST_CTRL_PORT not set"
                oneflow.env.ctrl_port(int(ctrl_port))

                data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
                if data_port:
                    oneflow.env.data_port(int(data_port))

                oneflow.deprecated.init_worker(scp_binary=True, use_uuid=True)
                atexit.register(oneflow.deprecated.delete_worker)
                _unittest_worker_initilized = True

        log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR")
        if log_dir:
            oneflow.env.log_dir(log_dir)

        if _unittest_env_initilized == False:
            oneflow.env.init()
            _unittest_env_initilized = True

        oneflow.clear_default_session()
        oneflow.enable_eager_execution(eager_execution_enabled())
        oneflow.experimental.enable_typing_check(typing_check_enabled())
Ejemplo n.º 3
0
    def test_eager_multi_output(test_case):

        flow.clear_default_session()
        flow.enable_eager_execution()

        func_config = flow.FunctionConfig()
        func_config.default_logical_view(flow.scope.mirrored_view())

        @flow.global_function(function_config=func_config)
        def foo_job():
            x = flow.constant(1, shape=(2, 5), dtype=flow.float)
            y = flow.get_variable(
                name="var",
                shape=(64, 4),
                dtype=flow.float,
                initializer=flow.zeros_initializer(),
            )
            return x, y

        x, y = foo_job().get()
        test_case.assertTrue(
            np.array_equal(np.ones(shape=(2, 5), dtype=np.single),
                           x.numpy_list()[0]))
        test_case.assertTrue(
            np.array_equal(np.zeros(shape=(64, 4), dtype=np.single),
                           y.numpy()))
Ejemplo n.º 4
0
    def test_lazy_input_output(test_case):
        flow.clear_default_session()
        flow.enable_eager_execution(False)

        func_config = flow.FunctionConfig()
        func_config.default_logical_view(flow.scope.mirrored_view())

        @flow.global_function(function_config=func_config)
        def foo_job(input_def: oft.Numpy.Placeholder(shape=(2, 5))):
            var = flow.get_variable(
                name="var",
                shape=(2, 5),
                dtype=flow.float,
                initializer=flow.ones_initializer(),
            )
            input_def = flow.cast_to_current_logical_view(input_def)
            var = flow.cast_to_current_logical_view(var)
            output = var + input_def
            return output

        checkpoint = flow.train.CheckPoint()
        checkpoint.init()
        input = np.arange(10).reshape(2, 5).astype(np.single)
        ret = foo_job(input).get()
        output = input + np.ones(shape=(2, 5), dtype=np.single)
        test_case.assertTrue(np.array_equal(output, ret.numpy()))
Ejemplo n.º 5
0
def test_eager_input_fixed(test_case):

    flow.clear_default_session()
    flow.enable_eager_execution()

    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())

    input = np.arange(10).astype(np.single)
    output = input + 1.0

    @flow.global_function(function_config=func_config)
    def foo_job(x_def: oft.Numpy.Placeholder(shape=(10, ), dtype=flow.float)):
        y = x_def + flow.constant(1.0, shape=(1, ), dtype=flow.float)
        test_case.assertTrue(np.allclose(y.numpy(0), output))

    foo_job(input)
Ejemplo n.º 6
0
    def test_eager_output(test_case):

        flow.clear_default_session()
        flow.enable_eager_execution()

        func_config = flow.FunctionConfig()
        func_config.default_logical_view(flow.scope.mirrored_view())

        @flow.global_function(function_config=func_config)
        def foo_job():
            x = flow.constant(1, shape=(2, 5), dtype=flow.float)
            return x

        ret = foo_job().get()
        test_case.assertTrue(
            np.array_equal(np.ones(shape=(2, 5), dtype=np.single),
                           ret.numpy_list()[0]))
def test_eager_tensor_list_input(test_case):
    flow.clear_default_session()
    flow.enable_eager_execution()

    input_0 = np.random.rand(1, 5, 4).astype(np.single)
    input_1 = np.random.rand(1, 4, 4).astype(np.single)
    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())

    @flow.global_function(func_config)
    def foo_job(
        input_def: oft.ListListNumpy.Placeholder(shape=(2, 5, 4), dtype=flow.float)
    ):
        output_0, output_1 = flow.tensor_list_split(input_def)
        test_case.assertTrue(np.array_equal(output_0.numpy(), input_0.squeeze()))
        test_case.assertTrue(np.array_equal(output_1.numpy(), input_1.squeeze()))

    foo_job([[input_0, input_1]])
Ejemplo n.º 8
0
    def test_eager_input(test_case):

        flow.clear_default_session()
        flow.enable_eager_execution()

        func_config = flow.FunctionConfig()
        func_config.default_logical_view(flow.scope.mirrored_view())

        input = np.random.rand(2, 5).astype(np.single)
        output = np.maximum(input, 0)

        @flow.global_function(function_config=func_config)
        def foo_job(x_def: oft.ListNumpy.Placeholder(shape=(2, 5),
                                                     dtype=flow.float)):
            y = flow.math.relu(x_def)
            test_case.assertTrue(np.allclose(y.numpy(0), output))

        foo_job([input])
Ejemplo n.º 9
0
    def test_eager_input_output(test_case):

        flow.clear_default_session()
        flow.enable_eager_execution()

        func_config = flow.FunctionConfig()
        func_config.default_logical_view(flow.scope.mirrored_view())

        input = np.random.rand(5, 4).astype(np.single)
        output = input * 2.0

        @flow.global_function(function_config=func_config)
        def foo_job(x_def: oft.ListNumpy.Placeholder(shape=(5, 4),
                                                     dtype=flow.float)):
            y = x_def * flow.constant(2.0, shape=(1, ), dtype=flow.float)
            return y

        ret = foo_job([input]).get()
        test_case.assertTrue(np.allclose(output, ret.numpy_list()[0]))
Ejemplo n.º 10
0
def test_eager_multi_input(test_case):

    flow.clear_default_session()
    flow.enable_eager_execution()

    func_config = flow.FunctionConfig()
    func_config.default_logical_view(flow.scope.mirrored_view())

    input_1 = np.random.rand(3, 4).astype(np.single)
    input_2 = np.array([2]).astype(np.single)
    output = input_1 * input_2

    @flow.global_function(function_config=func_config)
    def foo_job(
            x_def: oft.ListNumpy.Placeholder(shape=(3, 4), dtype=flow.float),
            y_def: oft.ListNumpy.Placeholder(shape=(1, ), dtype=flow.float),
    ):
        y = x_def * y_def
        test_case.assertTrue(np.allclose(y.numpy(0), output))

    foo_job([input_1], [input_2])
Ejemplo n.º 11
0
def test_2d_gpu_variable(test_case):
    flow.enable_eager_execution()
    flow.config.gpu_device_num(2)
    device_name = "0:0-1"

    @flow.global_function(type="train", function_config=flow.FunctionConfig())
    def Foo():
        with flow.scope.placement("gpu", device_name):
            w = flow.get_variable(
                "w",
                shape=(10, ),
                dtype=flow.float,
                initializer=flow.constant_initializer(0),
            )
            print(w.numpy(0))
        flow.optimizer.SGD(flow.optimizer.PiecewiseConstantScheduler([],
                                                                     [0.1]),
                           momentum=0).minimize(w)

    Foo()
    Foo()
Ejemplo n.º 12
0
def test_2d_gpu_variable(test_case):
    flow.enable_eager_execution()
    flow.config.gpu_device_num(2)
    function_config = flow.FunctionConfig()
    function_config.train.model_update_conf(dict(naive_conf={}))
    function_config.train.primary_lr(0.1)
    device_name = "0:0-1"

    @flow.global_function(function_config)
    def Foo():
        with flow.scope.placement("gpu", device_name):
            w = flow.get_variable(
                "w",
                shape=(10, ),
                dtype=flow.float,
                initializer=flow.constant_initializer(0),
            )
            print(w.numpy(0))
        flow.losses.add_loss(w)

    Foo()
    Foo()
Ejemplo n.º 13
0
def main(args):
    flow.enable_eager_execution()

    train_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="train",
        dataset_size=
        7795,  # NOTE(Liang Depeng): needs to explictly set the dataset size
        batch_size=args.train_batch_size,
    )

    val_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="val",
        dataset_size=7800,
        batch_size=args.val_batch_size,
    )

    # oneflow init
    start_t = time.time()
    res50_module = resnet50(num_classes=8)
    if args.load_checkpoint != "":
        res50_module.load_state_dict(flow.load(args.load_checkpoint))
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    of_cross_entropy = flow.nn.CrossEntropyLoss()

    res50_module.to("cuda")
    of_cross_entropy.to("cuda")

    of_sgd = flow.optim.SGD(res50_module.parameters(),
                            lr=args.learning_rate,
                            momentum=args.mom)

    of_losses = []
    all_samples = len(val_data_loader) * args.val_batch_size
    print_interval = 20

    for epoch in range(args.epochs):
        res50_module.train()

        for b in range(len(train_data_loader)):
            image, label = train_data_loader.get_batch()
            # oneflow train
            start_t = time.time()
            image = image.to("cuda")
            label = label.to("cuda")
            logits, body = res50_module(image)
            loss = scloss(label).to("cuda")(body, logits)
            loss.backward()
            of_sgd.step()
            of_sgd.zero_grad()
            end_t = time.time()
            if b % print_interval == 0:
                l = loss.numpy()
                of_losses.append(l)
                print(
                    "epoch {} train iter {} oneflow loss {}, train time : {}".
                    format(epoch, b, l, end_t - start_t))
        print("epoch %d train done, start validation" % epoch)

        res50_module.eval()
        correct_of = 0.0
        for b in range(len(val_data_loader)):
            image, label = val_data_loader.get_batch()
            start_t = time.time()
            image = image.to("cuda")
            with flow.no_grad():
                logits, body = res50_module(image)
                predictions = logits.softmax()
            of_predictions = predictions.numpy()
            clsidxs = np.argmax(of_predictions, axis=1)

            label_nd = label.numpy()
            for i in range(args.val_batch_size):
                if clsidxs[i] == label_nd[i]:
                    correct_of += 1
            end_t = time.time()

        print("epoch %d, oneflow top1 val acc: %f" %
              (epoch, correct_of / all_samples))
        flow.save(
            res50_module.state_dict(),
            os.path.join(
                args.save_checkpoint_path,
                "epoch_%d_val_acc_%f" % (epoch, correct_of / all_samples),
            ),
        )

    writer = open("of_losses.txt", "w")
    for o in of_losses:
        writer.write("%f\n" % o)
    writer.close()
Ejemplo n.º 14
0
 def setUp(self):
     flow.clear_default_session()
     flow.enable_eager_execution(True)
Ejemplo n.º 15
0
    def setUp(self):
        global _unittest_env_initilized
        global _unittest_worker_initilized
        if has_node_list():
            assert node_size() > 1
            if _unittest_worker_initilized == False:
                master_port = os.getenv("ONEFLOW_TEST_MASTER_PORT")
                assert master_port, "env var ONEFLOW_TEST_MASTER_PORT not set"
                oneflow.env.ctrl_port(int(master_port))
                if enable_init_by_host_list():
                    oneflow.env.machine(node_list())
                    data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
                    if data_port:
                        oneflow.env.data_port(int(data_port))
                    ssh_port = os.getenv("ONEFLOW_TEST_SSH_PORT")
                    print("initializing worker...")
                    oneflow.deprecated.init_worker(scp_binary=True,
                                                   use_uuid=True,
                                                   ssh_port=int(ssh_port))
                    atexit.register(oneflow.deprecated.delete_worker,
                                    ssh_port=ssh_port)
                    _unittest_worker_initilized = True
                else:
                    ctrl_port = os.getenv("ONEFLOW_TEST_CTRL_PORT")
                    config_rank_ctrl_port = -1
                    if ctrl_port:
                        config_rank_ctrl_port = int(ctrl_port)

                    if has_world_size():
                        config_world_size = world_size()
                    else:
                        config_world_size = 0

                    bootstrap_conf_list = oneflow.env.init_bootstrap_confs(
                        node_list(),
                        int(master_port),
                        config_world_size,
                        config_rank_ctrl_port,
                    )

                    data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
                    if data_port:
                        oneflow.env.data_port(int(data_port))

                    ssh_port = os.getenv("ONEFLOW_TEST_SSH_PORT")
                    print("initializing worker...")
                    oneflow.deprecated.init_worker(
                        scp_binary=True,
                        use_uuid=True,
                        ssh_port=int(ssh_port),
                        bootstrap_conf_list=bootstrap_conf_list,
                    )
                    atexit.register(
                        oneflow.deprecated.delete_worker_by_bootstrap,
                        ssh_port=ssh_port)
                    _unittest_worker_initilized = True

        log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR")
        if log_dir:
            oneflow.env.log_dir(log_dir)

        if _unittest_env_initilized == False:
            oneflow.env.init()
            _unittest_env_initilized = True

        oneflow.clear_default_session()
        oneflow.enable_eager_execution(eager_execution_enabled())
        oneflow.experimental.enable_typing_check(typing_check_enabled())
Ejemplo n.º 16
0
 def setUp(self):
     super().setUp()
     flow.enable_eager_execution(True)
# eager_mlp_mnist.py
import oneflow as flow
import oneflow.typing as tp
flow.enable_eager_execution(True)
BATCH_SIZE = 100


def main(images, labels):
    @flow.global_function(type="train")
    def train_job(images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28),
                                               dtype=flow.float),
                  labels: tp.Numpy.Placeholder((BATCH_SIZE, ),
                                               dtype=flow.int32)) -> tp.Numpy:
        with flow.scope.placement("cpu", "0:0"):
            initializer = flow.truncated_normal(0.1)
            reshape = flow.reshape(images, [images.shape[0], -1])
            hidden = flow.layers.dense(reshape,
                                       512,
                                       activation=flow.nn.relu,
                                       kernel_initializer=initializer,
                                       name="dense1")
            logits = flow.layers.dense(hidden,
                                       10,
                                       kernel_initializer=initializer,
                                       name="dense2")
            loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
                labels, logits)

        lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
        flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(loss)
Ejemplo n.º 18
0
 def setUp(self):
     oneflow.clear_default_session()
     oneflow.enable_eager_execution(False)
Ejemplo n.º 19
0
    def setUp(self):
        global _unittest_env_initilized
        global _unittest_worker_initilized
        if has_node_list():
            assert node_size() > 1
            if _unittest_worker_initilized == False:
                master_port = os.getenv("ONEFLOW_TEST_MASTER_PORT")
                assert master_port, "env var ONEFLOW_TEST_MASTER_PORT not set"
                oneflow.env.ctrl_port(int(master_port))
                if enable_init_by_host_list():
                    oneflow.env.machine(node_list())
                    data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
                    if data_port:
                        oneflow.env.data_port(int(data_port))
                    ssh_port = os.getenv("ONEFLOW_TEST_SSH_PORT")
                    print("initializing worker...")
                    oneflow.deprecated.init_worker(scp_binary=True,
                                                   use_uuid=True,
                                                   ssh_port=int(ssh_port))
                    atexit.register(oneflow.deprecated.delete_worker,
                                    ssh_port=ssh_port)
                    _unittest_worker_initilized = True
                else:
                    ctrl_port = os.getenv("ONEFLOW_TEST_CTRL_PORT")
                    config_rank_ctrl_port = -1
                    if ctrl_port:
                        config_rank_ctrl_port = int(ctrl_port)

                    if has_world_size():
                        config_world_size = world_size()
                    else:
                        config_world_size = 0

                    config_node_size = -1
                    env_node_size = os.getenv("ONEFLOW_TEST_NODE_SIZE")
                    if env_node_size:
                        config_node_size = int(env_node_size)

                    bootstrap_conf_list = oneflow.env.init_bootstrap_confs(
                        node_list(),
                        int(master_port),
                        config_world_size,
                        config_rank_ctrl_port,
                        config_node_size,
                    )

                    data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
                    if data_port:
                        oneflow.env.data_port(int(data_port))

                    ssh_port = os.getenv("ONEFLOW_TEST_SSH_PORT")
                    print("initializing worker...")
                    oneflow.deprecated.init_worker(
                        scp_binary=True,
                        use_uuid=True,
                        ssh_port=int(ssh_port),
                        bootstrap_conf_list=bootstrap_conf_list,
                    )
                    atexit.register(
                        oneflow.deprecated.delete_worker_by_bootstrap,
                        ssh_port=ssh_port)
                    _unittest_worker_initilized = True
        elif device_num() > 1 and enable_multi_process():
            master_port = find_free_port()
            oneflow.env.ctrl_port(master_port)
            config_world_size = device_num()
            bootstrap_conf_list = oneflow.env.init_bootstrap_confs(
                ["127.0.0.1"],
                master_port,
                config_world_size,
                num_process_per_node=device_num(),
            )
            env_proto = env_util.default_env_proto
            assert (len(env_proto.machine) == 1
                    and env_proto.HasField("ctrl_bootstrap_conf") == 1)
            run_dir = os.getenv("HOME") + "/oneflow_temp/" + str(uuid.uuid1())
            run_dir = os.path.abspath(os.path.expanduser(run_dir))
            if not os.path.exists(run_dir):
                os.makedirs(run_dir)
            for rank in range(1, config_world_size):
                worker_env_proto = EnvProto()
                worker_env_proto.CopyFrom(env_proto)
                worker_env_proto.ctrl_bootstrap_conf.rank = rank
                worker_env_proto.cpp_logging_conf.log_dir = (run_dir +
                                                             "/log_" +
                                                             str(rank))
                env_file = NamedTemporaryFile(delete=False)
                if sys.version_info >= (3, 0):
                    env_file.write(
                        pbtxt.MessageToString(worker_env_proto).encode())
                else:
                    env_file.write(pbtxt.MessageToString(worker_env_proto))
                env_file.close()
                if not os.path.exists(run_dir + "/log_" + str(rank)):
                    os.mkdir(run_dir + "/log_" + str(rank))
                os.system("cp " + env_file.name + " " + run_dir + "/log_" +
                          str(rank) + "/env_proto_" + str(rank) + ".proto")
                oneflow_cmd = ("python3 -m oneflow --start_worker" +
                               " --env_proto=" + run_dir + "/log_" +
                               str(rank) + "/" + "env_proto_" + str(rank) +
                               ".proto")
                subprocess.Popen(
                    oneflow_cmd,
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                    shell=True,
                )
                os.remove(env_file.name)
            atexit.register(oneflow.deprecated.delete_worker_of_multi_process,
                            run_dir=run_dir)

        log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR")
        if log_dir:
            oneflow.env.log_dir(log_dir)

        if _unittest_env_initilized == False:
            oneflow.env.init()
            _unittest_env_initilized = True

        oneflow.clear_default_session()
        oneflow.enable_eager_execution(eager_execution_enabled())
        oneflow.experimental.enable_typing_check(typing_check_enabled())