Esempio n. 1
0
    def testShuffleFailoverAfterAllSuccFinish(self):
        all_succ_finish_file = self.add_state_file('SHUFFLE_ALL_SUCC_FINISH_FILE')

        self.start_processes(modules=['mars.scheduler.tests.integrated.op_delayer'],
                             log_worker=True)

        session_id = uuid.uuid1()
        actor_client = new_client()
        session_ref = actor_client.actor_ref(self.session_manager_ref.create_session(session_id))

        a = mt.ones((31, 27), chunk_size=10)
        b = a.reshape(27, 31)
        b.op.extra_params['_reshape_with_shuffle'] = True
        r = mt.inner(b + 1, b + 1)
        graph = r.build_graph()
        targets = [r.key]
        graph_key = uuid.uuid1()
        session_ref.submit_tileable_graph(json.dumps(graph.to_json()),
                                          graph_key, target_tileables=targets)
        actor_client.sleep(1)

        while not os.path.exists(all_succ_finish_file):
            actor_client.sleep(0.01)

        self.kill_process_tree(self.proc_workers[0])
        logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid)
        self.proc_workers = self.proc_workers[1:]

        os.unlink(all_succ_finish_file)

        state = self.wait_for_termination(actor_client, session_ref, graph_key)
        self.assertEqual(state, GraphState.SUCCEEDED)

        result = session_ref.fetch_result(graph_key, r.key)
        assert_allclose(loads(result), np.inner(np.ones((27, 31)) + 1, np.ones((27, 31)) + 1))
Esempio n. 2
0
    def testShuffleFailoverBeforeAllSuccFinish(self):
        pred_finish_file = self.add_state_file(
            'SHUFFLE_ALL_PRED_FINISHED_FILE')
        succ_finish_file = self.add_state_file('SHUFFLE_HAS_SUCC_FINISH_FILE')

        self.start_processes(
            modules=['mars.scheduler.tests.integrated.op_delayer'],
            log_worker=True)

        a = mt.ones((31, 27), chunk_size=10)
        b = a.reshape(27, 31)
        b.op.extra_params['_reshape_with_shuffle'] = True
        r = mt.inner(b + 1, b + 1)

        future = self._submit_tileable(r)
        time.sleep(1)
        while not os.path.exists(succ_finish_file):
            time.sleep(0.01)

        self.kill_process_tree(self.proc_workers[0])
        logger.warning('Worker %s KILLED!\n\n', self.proc_workers[0].pid)
        self.proc_workers = self.proc_workers[1:]

        os.unlink(pred_finish_file)
        os.unlink(succ_finish_file)

        result = future.result(timeout=self.timeout)
        assert_allclose(result,
                        np.inner(np.ones((27, 31)) + 1,
                                 np.ones((27, 31)) + 1))