def test_submit_models(self): _reset() nni.retiarii.debug_configs.framework = 'pytorch' os.makedirs('generated', exist_ok=True) import nni.runtime.platform.test as tt protocol._set_out_file( open('generated/debug_protocol_out_file.py', 'wb')) protocol._set_in_file( open('generated/debug_protocol_out_file.py', 'rb')) models = _load_mnist(2) advisor = RetiariiAdvisor('ws://_unittest_placeholder_') advisor._channel = protocol.LegacyCommandChannel() advisor.default_worker.start() advisor.assessor_worker.start() remote = RemoteConfig(machine_list=[]) remote.machine_list.append( RemoteMachineConfig(host='test', gpu_indices=[0, 1, 2, 3])) cgo_engine = CGOExecutionEngine(training_service=remote, batch_waiting_time=0) set_execution_engine(cgo_engine) submit_models(*models) time.sleep(3) if torch.cuda.is_available() and torch.cuda.device_count() >= 2: cmd, data = protocol.receive() params = nni.load(data) tt.init_params(params) trial_thread = threading.Thread( target=CGOExecutionEngine.trial_execute_graph) trial_thread.start() last_metric = None while True: time.sleep(1) if tt._last_metric: metric = tt.get_last_metric() if metric == last_metric: continue if 'value' in metric: metric['value'] = json.dumps(metric['value']) advisor.handle_report_metric_data(metric) last_metric = metric if not trial_thread.is_alive(): trial_thread.join() break trial_thread.join() advisor.stopping = True advisor.default_worker.join() advisor.assessor_worker.join() cgo_engine.join()
def test_dedup_input_two_devices(self): _reset() lp, models = self._build_logical_with_mnist(3) opt = DedupInputOptimizer() opt.convert(lp) advisor = RetiariiAdvisor('ws://_unittest_placeholder_') advisor._channel = protocol.LegacyCommandChannel() advisor.default_worker.start() advisor.assessor_worker.start() remote = RemoteConfig(machine_list=[]) remote.machine_list.append( RemoteMachineConfig(host='test', gpu_indices=[0, 1])) cgo = CGOExecutionEngine(training_service=remote, batch_waiting_time=0) phy_models = cgo._assemble(lp) self.assertTrue(len(phy_models) == 2) advisor.stopping = True advisor.default_worker.join() advisor.assessor_worker.join() cgo.join()