def testShuffleFailoverAfterAllSuccFinish(self): all_succ_finish_file = self.add_state_file('SHUFFLE_ALL_SUCC_FINISH_FILE') self.start_processes(modules=['mars.scheduler.tests.integrated.op_delayer'], log_worker=True) session_id = uuid.uuid1() actor_client = new_client() session_ref = actor_client.actor_ref(self.session_manager_ref.create_session(session_id)) a = mt.ones((31, 27), chunk_size=10) b = a.reshape(27, 31) b.op.extra_params['_reshape_with_shuffle'] = True r = mt.inner(b + 1, b + 1) graph = r.build_graph() targets = [r.key] graph_key = uuid.uuid1() session_ref.submit_tileable_graph(json.dumps(graph.to_json()), graph_key, target_tileables=targets) actor_client.sleep(1) while not os.path.exists(all_succ_finish_file): actor_client.sleep(0.01) self.kill_process_tree(self.proc_workers[0]) logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid) self.proc_workers = self.proc_workers[1:] os.unlink(all_succ_finish_file) state = self.wait_for_termination(actor_client, session_ref, graph_key) self.assertEqual(state, GraphState.SUCCEEDED) result = session_ref.fetch_result(graph_key, r.key) assert_allclose(loads(result), np.inner(np.ones((27, 31)) + 1, np.ones((27, 31)) + 1))
def testShuffleFailoverBeforeAllSuccFinish(self): pred_finish_file = self.add_state_file( 'SHUFFLE_ALL_PRED_FINISHED_FILE') succ_finish_file = self.add_state_file('SHUFFLE_HAS_SUCC_FINISH_FILE') self.start_processes( modules=['mars.scheduler.tests.integrated.op_delayer'], log_worker=True) a = mt.ones((31, 27), chunk_size=10) b = a.reshape(27, 31) b.op.extra_params['_reshape_with_shuffle'] = True r = mt.inner(b + 1, b + 1) future = self._submit_tileable(r) time.sleep(1) while not os.path.exists(succ_finish_file): time.sleep(0.01) self.kill_process_tree(self.proc_workers[0]) logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid) self.proc_workers = self.proc_workers[1:] os.unlink(pred_finish_file) os.unlink(succ_finish_file) result = future.result(timeout=self.timeout) assert_allclose(result, np.inner(np.ones((27, 31)) + 1, np.ones((27, 31)) + 1))