예제 #1
0
    def test_dedup_input(self):
        os.environ['CGO'] = 'true'
        lp, models = self._build_logical_with_mnist(3)
        opt = DedupInputOptimizer()
        opt.convert(lp)
        with open('dedup_logical_graph.json', 'r') as fp:
            correct_dump = fp.readlines()
        lp_dump = lp.logical_graph._dump()

        self.assertTrue(correct_dump[0] == json.dumps(lp_dump))

        advisor = RetiariiAdvisor()
        cgo = CGOExecutionEngine()

        phy_models = cgo._assemble(lp)
        self.assertTrue(len(phy_models) == 1)
        # logging.info(phy_models[0][0]._dump())
        # script=model_to_pytorch_script(phy_models[0][0], placement = phy_models[0][1])
        # logging.info(script)
        # with open('generated/debug_dedup_input.py', 'w') as fp:
        #     fp.write(script)
        # sys.path.insert(0, 'generated')
        # multi_model = import_('debug_dedup_input.logical_0')
        # trainer = PyTorchMultiModelTrainer(
        #     multi_model(), phy_models[0][0].training_config.kwargs
        # )
        # trainer.fit()

        advisor.stopping = True
        advisor.default_worker.join()
        advisor.assessor_worker.join()
예제 #2
0
    def test_submit_models(self):
        _reset()
        nni.retiarii.debug_configs.framework = 'pytorch'
        os.makedirs('generated', exist_ok=True)
        import nni.runtime.platform.test as tt
        protocol._set_out_file(
            open('generated/debug_protocol_out_file.py', 'wb'))
        protocol._set_in_file(
            open('generated/debug_protocol_out_file.py', 'rb'))

        models = _load_mnist(2)

        advisor = RetiariiAdvisor('ws://_unittest_placeholder_')
        advisor._channel = protocol.LegacyCommandChannel()
        advisor.default_worker.start()
        advisor.assessor_worker.start()

        remote = RemoteConfig(machine_list=[])
        remote.machine_list.append(
            RemoteMachineConfig(host='test', gpu_indices=[0, 1, 2, 3]))
        cgo_engine = CGOExecutionEngine(training_service=remote,
                                        batch_waiting_time=0)
        set_execution_engine(cgo_engine)
        submit_models(*models)
        time.sleep(3)

        if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
            cmd, data = protocol.receive()
            params = nni.load(data)

            tt.init_params(params)

            trial_thread = threading.Thread(
                target=CGOExecutionEngine.trial_execute_graph)
            trial_thread.start()
            last_metric = None
            while True:
                time.sleep(1)
                if tt._last_metric:
                    metric = tt.get_last_metric()
                    if metric == last_metric:
                        continue
                    if 'value' in metric:
                        metric['value'] = json.dumps(metric['value'])
                    advisor.handle_report_metric_data(metric)
                    last_metric = metric
                if not trial_thread.is_alive():
                    trial_thread.join()
                    break

            trial_thread.join()

        advisor.stopping = True
        advisor.default_worker.join()
        advisor.assessor_worker.join()
        cgo_engine.join()
예제 #3
0
    def test_submit_models(self):
        _reset()
        nni.retiarii.debug_configs.framework = 'pytorch'
        os.makedirs('generated', exist_ok=True)
        from nni.runtime import protocol
        import nni.runtime.platform.test as tt
        protocol._set_out_file(
            open('generated/debug_protocol_out_file.py', 'wb'))
        protocol._set_in_file(
            open('generated/debug_protocol_out_file.py', 'rb'))

        models = _load_mnist(2)

        advisor = RetiariiAdvisor()
        cgo_engine = CGOExecutionEngine(devices=[
            GPUDevice("test", 0),
            GPUDevice("test", 1),
            GPUDevice("test", 2),
            GPUDevice("test", 3)
        ],
                                        batch_waiting_time=0)
        set_execution_engine(cgo_engine)
        submit_models(*models)
        time.sleep(3)

        if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
            cmd, data = protocol.receive()
            params = nni.load(data)

            tt.init_params(params)

            trial_thread = threading.Thread(
                target=CGOExecutionEngine.trial_execute_graph)
            trial_thread.start()
            last_metric = None
            while True:
                time.sleep(1)
                if tt._last_metric:
                    metric = tt.get_last_metric()
                    if metric == last_metric:
                        continue
                    if 'value' in metric:
                        metric['value'] = json.dumps(metric['value'])
                    advisor.handle_report_metric_data(metric)
                    last_metric = metric
                if not trial_thread.is_alive():
                    trial_thread.join()
                    break

            trial_thread.join()

        advisor.stopping = True
        advisor.default_worker.join()
        advisor.assessor_worker.join()
        cgo_engine.join()
예제 #4
0
    def test_dedup_input_two_devices(self):
        _reset()

        lp, models = self._build_logical_with_mnist(3)

        opt = DedupInputOptimizer()
        opt.convert(lp)

        advisor = RetiariiAdvisor()
        available_devices = [GPUDevice("test", 0), GPUDevice("test", 1)]
        cgo = CGOExecutionEngine(devices=available_devices,
                                 batch_waiting_time=0)

        phy_models = cgo._assemble(lp)
        self.assertTrue(len(phy_models) == 2)
        advisor.stopping = True
        advisor.default_worker.join()
        advisor.assessor_worker.join()
        cgo.join()
예제 #5
0
    def test_dedup_input_two_devices(self):
        _reset()

        lp, models = self._build_logical_with_mnist(3)

        opt = DedupInputOptimizer()
        opt.convert(lp)

        advisor = RetiariiAdvisor('ws://_unittest_placeholder_')
        advisor._channel = protocol.LegacyCommandChannel()
        advisor.default_worker.start()
        advisor.assessor_worker.start()

        remote = RemoteConfig(machine_list=[])
        remote.machine_list.append(
            RemoteMachineConfig(host='test', gpu_indices=[0, 1]))
        cgo = CGOExecutionEngine(training_service=remote, batch_waiting_time=0)

        phy_models = cgo._assemble(lp)
        self.assertTrue(len(phy_models) == 2)
        advisor.stopping = True
        advisor.default_worker.join()
        advisor.assessor_worker.join()
        cgo.join()
예제 #6
0
    def test_submit_models(self):
        os.environ['CGO'] = 'true'
        os.makedirs('generated', exist_ok=True)
        from nni.runtime import protocol, platform
        import nni.runtime.platform.test as tt
        protocol._out_file = open('generated/debug_protocol_out_file.py', 'wb')
        protocol._in_file = open('generated/debug_protocol_out_file.py', 'rb')

        models = _load_mnist(2)
        advisor = RetiariiAdvisor()
        submit_models(*models)

        if torch.cuda.is_available() and torch.cuda.device_count() >= 2:
            cmd, data = protocol.receive()
            params = json.loads(data)
            params['parameters']['training_kwargs']['max_steps'] = 100

            tt.init_params(params)

            trial_thread = threading.Thread(
                target=CGOExecutionEngine.trial_execute_graph())
            trial_thread.start()
            last_metric = None
            while True:
                time.sleep(1)
                if tt._last_metric:
                    metric = tt.get_last_metric()
                    if metric == last_metric:
                        continue
                    advisor.handle_report_metric_data(metric)
                    last_metric = metric
                if not trial_thread.is_alive():
                    break

            trial_thread.join()
        advisor.stopping = True
        advisor.default_worker.join()
        advisor.assessor_worker.join()