def test_multi_output(setup): sentences = ['word1 word2', 'word2 word3', 'word3 word2 word1'] def mapper(s): word_to_count = defaultdict(lambda: 0) for word in s.split(): word_to_count[word] += 1 downsides = [defaultdict(lambda: 0), defaultdict(lambda: 0)] for word, count in word_to_count.items(): downsides[mmh3_hash(word) % 2][word] += count return downsides def reducer(word_to_count_list): d = defaultdict(lambda: 0) for word_to_count in word_to_count_list: for word, count in word_to_count.items(): d[word] += count return dict(d) outs = [], [] for sentence in sentences: out1, out2 = spawn(mapper, sentence, n_output=2) outs[0].append(out1) outs[1].append(out2) rs = [] for out in outs: r = spawn(reducer, out) rs.append(r) result = dict() for wc in ExecutableTuple(rs).to_object(): result.update(wc) assert result == {'word1': 2, 'word2': 3, 'word3': 2}
def testFetchLogWithoutEtcd(self): # test fetch log with tempfile.TemporaryDirectory() as temp_dir: self.start_processes( etcd=False, modules=['mars.scheduler.tests.integrated.no_prepare_op'], scheduler_args=[f'-Dcustom_log_dir={temp_dir}']) sess = new_session(self.session_manager_ref.address) def f(): print('test') r = spawn(f) r.execute(session=sess) custom_log_actor = sess._api.actor_client.actor_ref( CustomLogMetaActor.default_uid(), address=self.cluster_info.get_scheduler( CustomLogMetaActor.default_uid())) chunk_key_to_log_path = custom_log_actor.get_tileable_op_log_paths( sess.session_id, r.op.key) paths = list(chunk_key_to_log_path.values()) self.assertEqual(len(paths), 1) log_path = paths[0][1] with open(log_path) as f: self.assertEqual(f.read().strip(), 'test') context = DistributedContext( scheduler_address=self.session_manager_ref.address, session_id=sess.session_id) log_result = context.fetch_tileable_op_logs(r.op.key) log = next(iter(log_result.values()))['log'] self.assertEqual(log.strip(), 'test') log = r.fetch_log() self.assertEqual(str(log).strip(), 'test') # test multiple functions def f1(size): print('f1' * size) sys.stdout.flush() fs = ExecutableTuple([spawn(f1, 30), spawn(f1, 40)]) fs.execute(session=sess) log = fs.fetch_log(offsets=20, sizes=10) self.assertEqual(str(log[0]).strip(), ('f1' * 30)[20:30]) self.assertEqual(str(log[1]).strip(), ('f1' * 40)[20:30]) self.assertGreater(len(log[0].offsets), 0) self.assertTrue(all(s > 0 for s in log[0].offsets)) self.assertGreater(len(log[1].offsets), 0) self.assertTrue(all(s > 0 for s in log[1].offsets)) self.assertGreater(len(log[0].chunk_op_keys), 0) # test negative offsets log = fs.fetch_log(offsets=-20, sizes=10) self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10]) self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10]) self.assertTrue(all(s > 0 for s in log[0].offsets)) self.assertGreater(len(log[1].offsets), 0) self.assertTrue(all(s > 0 for s in log[1].offsets)) self.assertGreater(len(log[0].chunk_op_keys), 0) # test negative offsets which represented in string log = fs.fetch_log(offsets='-0.02K', sizes='0.01K') self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10]) self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10]) self.assertTrue(all(s > 0 for s in log[0].offsets)) self.assertGreater(len(log[1].offsets), 0) self.assertTrue(all(s > 0 for s in log[1].offsets)) self.assertGreater(len(log[0].chunk_op_keys), 0) def test_nested(): print('level0') fr = spawn(f1, 1) fr.execute() print(fr.fetch_log()) r = spawn(test_nested) with self.assertRaises(ValueError): r.fetch_log() r.execute(session=sess) log = str(r.fetch_log()) self.assertIn('level0', log) self.assertIn('f1', log) df = md.DataFrame(mt.random.rand(10, 3), chunk_size=5) def df_func(c): print('df func') return c df2 = df.map_chunk(df_func) df2.execute(session=sess) log = df2.fetch_log() self.assertIn('Chunk op key:', str(log)) self.assertIn('df func', repr(log)) self.assertEqual(len(str(df.fetch_log(session=sess))), 0)
def testFetchLogWithoutEtcd(self): # test fetch log with tempfile.TemporaryDirectory() as temp_dir: self.start_processes(etcd=False, modules=['mars.scheduler.tests.integrated.no_prepare_op'], scheduler_args=[f'-Dcustom_log_dir={temp_dir}']) sess = new_session(self.session_manager_ref.address) def f(): print('test') r = spawn(f) r.execute(session=sess) custom_log_actor = sess._api.actor_client.actor_ref( CustomLogMetaActor.default_uid(), address=self.cluster_info.get_scheduler(CustomLogMetaActor.default_uid()) ) chunk_key_to_log_path = custom_log_actor.get_tileable_op_log_paths( sess.session_id, r.op.key) paths = list(chunk_key_to_log_path.values()) self.assertEqual(len(paths), 1) log_path = paths[0][1] with open(log_path) as f: self.assertEqual(f.read().strip(), 'test') context = DistributedContext(scheduler_address=self.session_manager_ref.address, session_id=sess.session_id) log_result = context.fetch_tileable_op_logs(r.op.key) log = next(iter(log_result.values()))['log'] self.assertEqual(log.strip(), 'test') log = r.fetch_log() self.assertEqual(str(log).strip(), 'test') # test multiple functions def f1(size): print('f1' * size) sys.stdout.flush() fs = ExecutableTuple([spawn(f1, 30), spawn(f1, 40)]) fs.execute(session=sess) log = fs.fetch_log(offsets=20, sizes=10) self.assertEqual(str(log[0]).strip(), ('f1' * 30)[20:30]) self.assertEqual(str(log[1]).strip(), ('f1' * 40)[20:30]) self.assertGreater(len(log[0].offsets), 0) self.assertTrue(all(s > 0 for s in log[0].offsets)) self.assertGreater(len(log[1].offsets), 0) self.assertTrue(all(s > 0 for s in log[1].offsets)) self.assertGreater(len(log[0].chunk_op_keys), 0) # test negative offsets log = fs.fetch_log(offsets=-20, sizes=10) self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10]) self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10]) self.assertTrue(all(s > 0 for s in log[0].offsets)) self.assertGreater(len(log[1].offsets), 0) self.assertTrue(all(s > 0 for s in log[1].offsets)) self.assertGreater(len(log[0].chunk_op_keys), 0) # test negative offsets which represented in string log = fs.fetch_log(offsets='-0.02K', sizes='0.01K') self.assertEqual(str(log[0]).strip(), ('f1' * 30 + '\n')[-20:-10]) self.assertEqual(str(log[1]).strip(), ('f1' * 40 + '\n')[-20:-10]) self.assertTrue(all(s > 0 for s in log[0].offsets)) self.assertGreater(len(log[1].offsets), 0) self.assertTrue(all(s > 0 for s in log[1].offsets)) self.assertGreater(len(log[0].chunk_op_keys), 0) def test_nested(): print('level0') fr = spawn(f1, 1) fr.execute() print(fr.fetch_log()) r = spawn(test_nested) with self.assertRaises(ValueError): r.fetch_log() r.execute(session=sess) log = str(r.fetch_log()) self.assertIn('level0', log) self.assertIn('f1', log) df = md.DataFrame(mt.random.rand(10, 3), chunk_size=5) def df_func(c): print('df func') return c df2 = df.map_chunk(df_func) df2.execute(session=sess) log = df2.fetch_log() self.assertIn('Chunk op key:', str(log)) self.assertIn('df func', repr(log)) self.assertEqual(len(str(df.fetch_log(session=sess))), 0) def test_host(rndf): rm = spawn(nested, rndf) rm.execute() print(rm.fetch_log()) def nested(_rndf): print('log_content') ds = [spawn(test_host, n, retry_when_fail=False) for n in np.random.rand(4)] xtp = ExecutableTuple(ds) xtp.execute(session=sess) for log in xtp.fetch_log(session=sess): self.assertEqual(str(log).strip(), 'log_content') def test_threaded(): import threading exc_info = None def print_fun(): nonlocal exc_info try: print('inner') except: # noqa: E722 # nosec # pylint: disable=bare-except exc_info = sys.exc_info() print_thread = threading.Thread(target=print_fun) print_thread.start() print_thread.join() if exc_info is not None: raise exc_info[1].with_traceback(exc_info[-1]) print('after') rm = spawn(test_threaded) rm.execute(session=sess) logs = str(rm.fetch_log(session=sess)).strip() self.assertEqual(logs, 'inner\nafter')