def hash(cls, parsed_args): """ :param dict[str] parsed_args: :return: hash for job given the arguments :rtype: str """ d = {} for k, v in parsed_args.items(): if k not in cls.__sis_hash_exclude__ or cls.__sis_hash_exclude__[k] != v: d[k] = v if cls.__sis_version__ is None: return tools.sis_hash(d) else: return tools.sis_hash((d, cls.__sis_version__))
def test_f(self): from recipe.task import test t = test.Test('foo') Point = collections.namedtuple('Point', ['x', 'y']) for obj, ref, hash_ref in [ (0, b'(int, 0)', '32c41e3ec33997dc8f7aa39d8c00317b'), ('0', b"(str, '0')", None), (b'0', b"(bytes, 0)", None), (b'\x00', b'(bytes, \x00)', None), ((8 + 6j), b"(complex, (8+6j))", None), (None, b"(NoneType)", None), ([1, 2, -1], b"(list, (int, 1), (int, 2), (int, -1))", None), ((1, 2, -1), b"(tuple, (int, 1), (int, 2), (int, -1))", None), ({1, 2, -1}, b"(set, (int, -1), (int, 1), (int, 2))", None), (frozenset({1, 2, -1}), b"(frozenset, (int, -1), (int, 1), (int, 2))", None), ({ 'foo': 1, 'bar': -1 }, b"(dict, (tuple, (str, 'bar'), (int, -1)), (tuple, (str, 'foo'), (int, 1)))", None), (TestClass(1, 2), b"(TestClass, (dict, (tuple, (str, 'a'), (int, 1)), (tuple, (str, 'b'), (int, 2))))", None), (Point(3, 5), b'(Point, (tuple, (tuple, (int, 3), (int, 5)), (NoneType)))', None), (test.Test('foo'), b"task/test/Test.7be358a10ed713206e44d0ab965e8612", None), (job_path.Path('foo/bar'), b"(Path, (tuple, (NoneType), (str, 'foo/bar')))", None), (b'0' * 4087, b"(bytes, " + b'0' * 4087 + b")", None), (b'0' * 4088, b't\xe0\xf8\xbb\xfd\xe6\xfaN\xa6\xac`\x7f\xd3\xfeZ\xa3c6z\xe8\xc7\x869^\xa1\x011\x8e\xfcx\xa1V', None), ({ TestClass(1, 2): 999, test.Test('foo'): 777, 'foo': test.Test('bar'), 'bar': job_path.Path('foo/bar'), job_path.Path('foo/bar'): 'bar' }, b"(dict, (tuple, (Path, (tuple, (NoneType), (str, 'foo/bar'))), (str, 'bar')), " b"(tuple, (TestClass, (dict, (tuple, (str, 'a'), (int, 1)), (tuple, (str, 'b'), " b"(int, 2)))), (int, 999)), (tuple, (str, 'bar'), (Path, (tuple, (NoneType), " b"(str, 'foo/bar')))), (tuple, (str, 'foo'), task/test/Test.84bbb5730368c68c8151b56c3ede6c5e), " b"(tuple, task/test/Test.7be358a10ed713206e44d0ab965e8612, (int, 777)))", None), ]: res = sis_hash_helper(obj) self.assertEqual(res, ref) if hash_ref is None: hash_ref = hashlib.md5(ref).hexdigest() hash_res = sis_hash(obj) self.assertEqual(hash_res, hash_ref)
def submit(self, task): """ Prepares all relevant commands and calls submit_call of subclass to actual pass job to relevant engine :param sisyphus.task.Task task: Task to submit :return: None """ call = gs.SIS_COMMAND + [ gs.CMD_WORKER, os.path.relpath(task.path()), task.name() ] logpath = os.path.relpath(task.path(gs.JOB_LOG_ENGINE)) task_ids = [ task_id for task_id in task.task_ids() if task.state(self, task_id, True) in [gs.STATE_RUNNABLE, gs.STATE_INTERRUPTED] ] # update rqmts and collect them rqmt_to_ids = {} for task_id in task_ids: rqmt = self.get_rqmt(task, task_id) key = tools.sis_hash(rqmt) if key not in rqmt_to_ids: rqmt_to_ids[key] = (rqmt, set()) rqmt_, ids = rqmt_to_ids[key] assert (task_id not in ids) assert (rqmt == rqmt_) ids.add(task_id) # the actuary job submitting part submit_log = os.path.relpath(task.path(gs.ENGINE_SUBMIT)) for rqmt_key, (rqmt, task_ids) in rqmt_to_ids.items(): task_ids = sorted(task_ids) logging.info('Submit to queue: %s %s %s' % (str(task.path()), task.name(), str(task_ids))) engine_name, engine_info = self.submit_call( call, logpath, rqmt, task.task_name(), task.name(), task_ids) logging.debug('Command: (%s) Tasks ids: (%s)' % (' '.join(call), ' '.join([str(i) for i in task_ids]))) logging.debug('Requirements: %s' % (str(rqmt))) submit_info = rqmt.copy() submit_info['engine_info'] = engine_info submit_info['engine_name'] = engine_name with open(submit_log, 'a') as submit_file: submit_file.write('%s\n' % str((task_ids, submit_info))) task.reset_cache()
def hash_limited_len_name(name, limit=200): """ :param str name: :param int limit: :return: name, maybe truncated (by hash) such that its len (in bytes) is <=200 :rtype: str """ name_b = name.encode("utf8") if len(name_b) < limit: return name assert len(name_b) == len(name) # ascii expected currently... h = sis_hash(name_b) name = "%s...%s" % (name[:limit - 3 - len(h)], h) assert len(name) == limit return name
def visualize_block(block, engine, vis_url_prefix): """Creates a dot representation of a Block""" jobs = set() inputs = set() input_to_node = dict() links = set() counts = collections.Counter() result = [] result.append('digraph G {\n') result.append('rankdir = TB;\n') # output jobs and blocks and gather info on inputs and links for idx, child in enumerate(block.filtered_children()): if isinstance(child, Block): result.append( dot_node(child.name, color_map[block_state(child, engine)], 'box3d', vis_url_prefix + '.%d' % idx)) bj = block_jobs(child) bei = block_external_inputs(child, bj) inputs.update(bei) counts.update(bei) links.update((i, child.name) for i in bei) for j in bj.values(): input_to_node.update((o.rel_path(), child.name) for o in j._sis_outputs.values()) elif isinstance(child, Job): job_name = job_id = child._sis_id() job_name = child.get_one_alias() if child.get_one_alias( ) is not None else job_name job_name = child.get_vis_name() if child.get_vis_name( ) is not None else job_name if job_id in jobs: continue jobs.add(job_id) result.append( dot_node(job_id, color_map[child._sis_state(engine)], 'folder', '/info/' + job_id, job_name)) inputs.update(i.rel_path() for i in child._sis_inputs) counts.update(i.rel_path() for i in child._sis_inputs) links.update((i.rel_path(), job_id) for i in child._sis_inputs) input_to_node.update( (o.rel_path(), job_id) for o in child._sis_outputs.values()) merge_inputs_mapping = dict() merged_labels = dict() merged_creators = dict() for i in inputs: creator = input_to_node[i] if i in input_to_node else '' users = sorted(t[1] for t in filter(lambda l: l[0] == i, links)) hash = sis_hash((creator, users)) merge_inputs_mapping[i] = hash if hash not in merged_labels: merged_labels[hash] = [i.split('/')[-1]] else: merged_labels[hash].append(i.split('/')[-1]) if len(creator) > 0: merged_creators[hash] = creator if len(merged_labels) > gs.VIS_MAX_NODES_PER_VIEW: return False, 'Failed to create visual representation. The graph contains more than %i nodes which exceeds ' \ 'the limit of %i (VIS_MAX_NODES_PER_VIEW)' % (len(merged_labels), gs.VIS_MAX_NODES_PER_VIEW) merged_links = set((merge_inputs_mapping[l[0]], l[1]) for l in links) merged_counts = collections.Counter( dict((merge_inputs_mapping[k], v) for k, v in counts.items())) # output inputs and the links from the creators to the inputs for h, l in merged_labels.items(): result.append( dot_node(h, 'aquamarine', 'box', '', '\\n'.join(compact_inputs(l)))) if h in merged_creators: result.append('"%s" -> "%s";\n' % (merged_creators[h], h)) # output input-links common_inputs = set() for l in merged_links: if merged_counts[l[0]] <= gs.VIS_RELATIVE_MERGE_THRESHOLD * len(block.filtered_children()) or \ merged_counts[l[0]] <= gs.VIS_ABSOLUTE_MERGE_THRESHOLD: result.append('"%s" -> "%s";\n' % l) else: common_inputs.add(l[0]) # output common-inputs if len(common_inputs) > 0: result.append(dot_node('[Common Inputs]', 'white', 'box', '')) for ci in common_inputs: result.append('"%s" -> "[Common Inputs]";' % ci) result.append('}\n') return True, ''.join(result)