Example #1
0
 def hash(cls, parsed_args):
     """
     :param dict[str] parsed_args:
     :return: hash for job given the arguments
     :rtype: str
     """
     d = {}
     for k, v in parsed_args.items():
         if k not in cls.__sis_hash_exclude__ or cls.__sis_hash_exclude__[k] != v:
             d[k] = v
     if cls.__sis_version__ is None:
         return tools.sis_hash(d)
     else:
         return tools.sis_hash((d, cls.__sis_version__))
Example #2
0
 def test_f(self):
     from recipe.task import test
     t = test.Test('foo')
     Point = collections.namedtuple('Point', ['x', 'y'])
     for obj, ref, hash_ref in [
         (0, b'(int, 0)', '32c41e3ec33997dc8f7aa39d8c00317b'),
         ('0', b"(str, '0')", None),
         (b'0', b"(bytes, 0)", None),
         (b'\x00', b'(bytes, \x00)', None),
         ((8 + 6j), b"(complex, (8+6j))", None),
         (None, b"(NoneType)", None),
         ([1, 2, -1], b"(list, (int, 1), (int, 2), (int, -1))", None),
         ((1, 2, -1), b"(tuple, (int, 1), (int, 2), (int, -1))", None),
         ({1, 2, -1}, b"(set, (int, -1), (int, 1), (int, 2))", None),
         (frozenset({1, 2, -1}),
          b"(frozenset, (int, -1), (int, 1), (int, 2))", None),
         ({
             'foo': 1,
             'bar': -1
         },
          b"(dict, (tuple, (str, 'bar'), (int, -1)), (tuple, (str, 'foo'), (int, 1)))",
          None),
         (TestClass(1, 2),
          b"(TestClass, (dict, (tuple, (str, 'a'), (int, 1)), (tuple, (str, 'b'), (int, 2))))",
          None),
         (Point(3, 5),
          b'(Point, (tuple, (tuple, (int, 3), (int, 5)), (NoneType)))',
          None),
         (test.Test('foo'),
          b"task/test/Test.7be358a10ed713206e44d0ab965e8612", None),
         (job_path.Path('foo/bar'),
          b"(Path, (tuple, (NoneType), (str, 'foo/bar')))", None),
         (b'0' * 4087, b"(bytes, " + b'0' * 4087 + b")", None),
         (b'0' * 4088,
          b't\xe0\xf8\xbb\xfd\xe6\xfaN\xa6\xac`\x7f\xd3\xfeZ\xa3c6z\xe8\xc7\x869^\xa1\x011\x8e\xfcx\xa1V',
          None),
         ({
             TestClass(1, 2): 999,
             test.Test('foo'): 777,
             'foo': test.Test('bar'),
             'bar': job_path.Path('foo/bar'),
             job_path.Path('foo/bar'): 'bar'
         },
          b"(dict, (tuple, (Path, (tuple, (NoneType), (str, 'foo/bar'))), (str, 'bar')), "
          b"(tuple, (TestClass, (dict, (tuple, (str, 'a'), (int, 1)), (tuple, (str, 'b'), "
          b"(int, 2)))), (int, 999)), (tuple, (str, 'bar'), (Path, (tuple, (NoneType), "
          b"(str, 'foo/bar')))), (tuple, (str, 'foo'), task/test/Test.84bbb5730368c68c8151b56c3ede6c5e), "
          b"(tuple, task/test/Test.7be358a10ed713206e44d0ab965e8612, (int, 777)))",
          None),
     ]:
         res = sis_hash_helper(obj)
         self.assertEqual(res, ref)
         if hash_ref is None:
             hash_ref = hashlib.md5(ref).hexdigest()
         hash_res = sis_hash(obj)
         self.assertEqual(hash_res, hash_ref)
Example #3
0
    def submit(self, task):
        """ Prepares all relevant commands and calls submit_call of subclass to actual
        pass job to relevant engine

        :param sisyphus.task.Task task: Task to submit
        :return: None
        """

        call = gs.SIS_COMMAND + [
            gs.CMD_WORKER,
            os.path.relpath(task.path()),
            task.name()
        ]
        logpath = os.path.relpath(task.path(gs.JOB_LOG_ENGINE))
        task_ids = [
            task_id for task_id in task.task_ids()
            if task.state(self, task_id, True) in
            [gs.STATE_RUNNABLE, gs.STATE_INTERRUPTED]
        ]

        # update rqmts and collect them
        rqmt_to_ids = {}
        for task_id in task_ids:
            rqmt = self.get_rqmt(task, task_id)

            key = tools.sis_hash(rqmt)
            if key not in rqmt_to_ids:
                rqmt_to_ids[key] = (rqmt, set())
            rqmt_, ids = rqmt_to_ids[key]
            assert (task_id not in ids)
            assert (rqmt == rqmt_)
            ids.add(task_id)

        # the actuary job submitting part
        submit_log = os.path.relpath(task.path(gs.ENGINE_SUBMIT))
        for rqmt_key, (rqmt, task_ids) in rqmt_to_ids.items():
            task_ids = sorted(task_ids)
            logging.info('Submit to queue: %s %s %s' %
                         (str(task.path()), task.name(), str(task_ids)))
            engine_name, engine_info = self.submit_call(
                call, logpath, rqmt, task.task_name(), task.name(), task_ids)
            logging.debug('Command: (%s) Tasks ids: (%s)' %
                          (' '.join(call), ' '.join([str(i)
                                                     for i in task_ids])))
            logging.debug('Requirements: %s' % (str(rqmt)))

            submit_info = rqmt.copy()
            submit_info['engine_info'] = engine_info
            submit_info['engine_name'] = engine_name
            with open(submit_log, 'a') as submit_file:
                submit_file.write('%s\n' % str((task_ids, submit_info)))

        task.reset_cache()
Example #4
0
def hash_limited_len_name(name, limit=200):
    """
  :param str name:
  :param int limit:
  :return: name, maybe truncated (by hash) such that its len (in bytes) is <=200
  :rtype: str
  """
    name_b = name.encode("utf8")
    if len(name_b) < limit:
        return name
    assert len(name_b) == len(name)  # ascii expected currently...
    h = sis_hash(name_b)
    name = "%s...%s" % (name[:limit - 3 - len(h)], h)
    assert len(name) == limit
    return name
Example #5
0
def visualize_block(block, engine, vis_url_prefix):
    """Creates a dot representation of a Block"""
    jobs = set()
    inputs = set()
    input_to_node = dict()
    links = set()
    counts = collections.Counter()

    result = []
    result.append('digraph G {\n')
    result.append('rankdir = TB;\n')

    # output jobs and blocks and gather info on inputs and links
    for idx, child in enumerate(block.filtered_children()):
        if isinstance(child, Block):
            result.append(
                dot_node(child.name, color_map[block_state(child, engine)],
                         'box3d', vis_url_prefix + '.%d' % idx))

            bj = block_jobs(child)
            bei = block_external_inputs(child, bj)

            inputs.update(bei)
            counts.update(bei)
            links.update((i, child.name) for i in bei)

            for j in bj.values():
                input_to_node.update((o.rel_path(), child.name)
                                     for o in j._sis_outputs.values())

        elif isinstance(child, Job):
            job_name = job_id = child._sis_id()
            job_name = child.get_one_alias() if child.get_one_alias(
            ) is not None else job_name
            job_name = child.get_vis_name() if child.get_vis_name(
            ) is not None else job_name
            if job_id in jobs:
                continue
            jobs.add(job_id)
            result.append(
                dot_node(job_id, color_map[child._sis_state(engine)], 'folder',
                         '/info/' + job_id, job_name))

            inputs.update(i.rel_path() for i in child._sis_inputs)
            counts.update(i.rel_path() for i in child._sis_inputs)
            links.update((i.rel_path(), job_id) for i in child._sis_inputs)
            input_to_node.update(
                (o.rel_path(), job_id) for o in child._sis_outputs.values())

    merge_inputs_mapping = dict()
    merged_labels = dict()
    merged_creators = dict()

    for i in inputs:
        creator = input_to_node[i] if i in input_to_node else ''
        users = sorted(t[1] for t in filter(lambda l: l[0] == i, links))
        hash = sis_hash((creator, users))
        merge_inputs_mapping[i] = hash
        if hash not in merged_labels:
            merged_labels[hash] = [i.split('/')[-1]]
        else:
            merged_labels[hash].append(i.split('/')[-1])
        if len(creator) > 0:
            merged_creators[hash] = creator

    if len(merged_labels) > gs.VIS_MAX_NODES_PER_VIEW:
        return False, 'Failed to create visual representation. The graph contains more than %i nodes which exceeds ' \
                      'the limit of %i (VIS_MAX_NODES_PER_VIEW)' % (len(merged_labels), gs.VIS_MAX_NODES_PER_VIEW)

        merged_links = set((merge_inputs_mapping[l[0]], l[1]) for l in links)
    merged_counts = collections.Counter(
        dict((merge_inputs_mapping[k], v) for k, v in counts.items()))

    # output inputs and the links from the creators to the inputs
    for h, l in merged_labels.items():
        result.append(
            dot_node(h, 'aquamarine', 'box', '',
                     '\\n'.join(compact_inputs(l))))
        if h in merged_creators:
            result.append('"%s" -> "%s";\n' % (merged_creators[h], h))

    # output input-links
    common_inputs = set()
    for l in merged_links:
        if merged_counts[l[0]] <= gs.VIS_RELATIVE_MERGE_THRESHOLD * len(block.filtered_children()) or \
           merged_counts[l[0]] <= gs.VIS_ABSOLUTE_MERGE_THRESHOLD:
            result.append('"%s" -> "%s";\n' % l)
        else:
            common_inputs.add(l[0])

    # output common-inputs
    if len(common_inputs) > 0:
        result.append(dot_node('[Common Inputs]', 'white', 'box', ''))
        for ci in common_inputs:
            result.append('"%s" -> "[Common Inputs]";' % ci)

    result.append('}\n')
    return True, ''.join(result)