def test_parse_metadata(): header = strip_margin( r""" |msgid "" |msgstr "" |"Content-Type: text/plain; charset=UTF-8\n" |"Content-Transfer-Encoding: 8bit\n" """ ).strip() metadata = next(load_po(io.StringIO(header))) assert parse_metadata(metadata) == { "Content-Type": "text/plain; charset=UTF-8", "Content-Transfer-Encoding": "8bit", }
def diff_workflow(flow1, flow2): """Visualize the differences between two workflows. Requires graphviz's frontend "xdot" program to be installed. Args: flow1, flow2: visualize the differences between these workflows. """ nodes = frozenset.union(frozenset(flow1.tasks.keys()), frozenset(flow2.tasks.keys())) deps = frozenset.union(flow1.deps, flow2.deps) def make_node(task_id): if task_id not in flow1.tasks: color = 'blue' elif task_id not in flow2.tasks: color = 'red' else: color = 'black' return ' %s [color="%s"];' % (base.make_ident(task_id), color) def make_dep(dep): if dep not in flow1.deps: color = 'blue' elif dep not in flow2.deps: color = 'red' else: color = 'black' return ' %s -> %s [color="%s"];' \ % (base.make_ident(dep.after), base.make_ident(dep.before), color) _DOT_TEMPLATE = base.strip_margin("""\ |digraph Workflow { |%(nodes)s |%(deps)s |}""") nodes = sorted(map(make_node, nodes)) deps = sorted(map(make_dep, deps)) dot_source = _DOT_TEMPLATE % dict( nodes='\n'.join(nodes), deps='\n'.join(deps), ) with tempfile.NamedTemporaryFile(prefix='wfdiff.', suffix='.dot') as f: f.write(dot_source.encode()) f.flush() os.system('xdot %s' % f.name)
def __call__(self, args, config_file=None): """Allows invoking an action as a function. Args: args: Command-line arguments specific to the action. config_file: Location of a file containing a json object with base flag values (values in args will take precedence over these values). Returns: Exit code. """ if not self._flags.parse(args, config_file): return os.EX_USAGE if (self._help_flag == HELP_FLAG.ADD_HANDLE) and self.flags.help: print(base.strip_margin(self.USAGE.strip() % { 'this': '%s %s' % (base.get_program_name(), self.get_name()), })) print() self.flags.print_usage() return os.EX_OK return self.run(self._flags.get_unparsed())
def test_load_po(): data = strip_margin( """ |# Some comment |#: body_default.txt:7 |msgctxt "BODY:BASIC_1PARTBODY" |msgid "[BP:UB:body:bodies]" |msgstr "[BP:UB:тело:тела]" """ ) expected = TranslationItem( context="BODY:BASIC_1PARTBODY", text="[BP:UB:body:bodies]", translation="[BP:UB:тело:тела]", # translator_comment="Some comment\n", # source_file="body_default.txt", # line_number=7, ) result = next(load_po(StringIO(data))) assert result == expected
def link_python_binary(self, path, main_module, content_map): """Links a Python binary. Args: path: Path of the executable file to produce. main_module: Name of the main module to invoke. content_map: Mapping of the files to include in the binary: python path -> content (bytes). """ os.makedirs(os.path.dirname(path), exist_ok=True) wfile = open(path, "wb") wfile.write(b"#!/usr/bin/env python3.4\n") zf = zipfile.ZipFile(file=wfile, mode="w", compression=zipfile.ZIP_STORED) main_py = base.strip_margin(""" |#!/usr/bin/env python3.4 |# -*- coding: utf-8; mode: python -*- | |import runpy |import sys | |module_name = "{module_name}" |runpy.run_module(module_name, run_name="__main__", alter_sys=False) |""" ).format(module_name=main_module) zf.writestr(zinfo_or_arcname="__main__.py", data=main_py) for python_path, content_bytes in sorted(content_map.items()): zf.writestr(zinfo_or_arcname=python_path, data=content_bytes) zf.close() wfile.close() # Set execution permission: os.chmod(path=path, mode=0o755) return path
def test_po_reader(): po_content = strip_margin( r""" |msgid "" |msgstr "" |"Project-Id-Version: Dwarf Fortress\n" |"PO-Revision-Date: 2019-11-20 10:25+0000\n" |"Content-Type: text/plain; charset=UTF-8\n" |"Content-Transfer-Encoding: 8bit\n" |"Language: ru\n" | |# Some comment |#: body_default.txt:7 |msgctxt "BODY:BASIC_1PARTBODY" |msgid "[BP:UB:body:bodies]" |msgstr "[BP:UB:тело:тела]" """ ) po = PoReader(StringIO(po_content)) assert po.meta == { "Project-Id-Version": "Dwarf Fortress", "PO-Revision-Date": "2019-11-20 10:25+0000", "Content-Type": "text/plain; charset=UTF-8", "Content-Transfer-Encoding": "8bit", "Language": "ru", } assert next(po) == TranslationItem( context="BODY:BASIC_1PARTBODY", text="[BP:UB:body:bodies]", translation="[BP:UB:тело:тела]", # translator_comment="Some comment\n", # source_file="body_default.txt", # line_number=7, )
def dump_state_as_table(self): """Dumps the running state of this workflow as an HTML table. Returns: The running state of this workflow as an HTML table. """ with self._lock: successes = frozenset(self._success) failures = frozenset(self._failure) pending = frozenset(self._pending) running = frozenset(self._running) runnable = frozenset(self._runnable) def format_task(task): if task.start_time is None: return task.task_id elif task.end_time is None: return '%s (start time: %s - elapsed: %s)' % ( task.task_id, base.timestamp(task.start_time.timestamp()), datetime.datetime.now() - task.start_time) else: return '%s (start time: %s - end time: %s - duration: %s)' % ( task.task_id, base.timestamp(task.start_time.timestamp()), base.timestamp(task.end_time.timestamp()), task.end_time - task.start_time) successes = frozenset(map(format_task, successes)) failures = frozenset(map(format_task, failures)) pending = frozenset(map(format_task, pending)) running = frozenset(map(format_task, running)) runnable = frozenset(map(format_task, runnable)) return base.strip_margin("""\ |Running: %(nrunning)s |Runnable: %(nrunnable)s |Pending: %(npending)s |Successful: %(nsuccesses)s |Failed: %(nfailures)s |%(ruler)s |Running tasks: |%(running)s |%(ruler)s |Runnable tasks: |%(runnable)s |%(ruler)s |Pending tasks: |%(pending)s |%(ruler)s |Successful tasks: |%(successes)s |%(ruler)s |Failed tasks: |%(failures)s """) % dict( ruler = '-' * 80, nrunning = len(running), nrunnable = len(runnable), npending = len(pending), nsuccesses = len(successes), nfailures = len(failures), running = '\n'.join(map(lambda s: ' - %s' % s, sorted(running))), runnable = '\n'.join(map(lambda s: ' - %s' % s, sorted(runnable))), pending = '\n'.join(map(lambda s: ' - %s' % s, sorted(pending))), successes = '\n'.join(map(lambda s: ' - %s' % s, sorted(successes))), failures = '\n'.join(map(lambda s: ' - %s' % s, sorted(failures))), )
class Workflow(object): """Represents a graph of tasks with dependencies.""" def __init__(self, name=None): """Initializes a new empty workflow.""" if name is None: name = 'Workflow-%s' % id(self) self._name = name self._lock = threading.Lock() self._done = threading.Event() # Map: task ID -> Task # Becomes immutable after call to Build() self._tasks = dict() # Dependencies, as a set of Dependency objects: self._deps = set() # No new task may be added once the worker pool starts: self._started = False # A task belongs to exactly one of the following buckets: # - running: task is currently running; # - runnable: task may run, but no worker is available; # - pending: task is blocked until all its dependencies are satisfied; # - success or failure: task has completed. self._pending = set() self._runnable = set() self._running = set() self._success = set() self._failure = set() # Queue of runnable tasks to pick from: # This queue is updated to stay consistent with self._runnable: self._runnable_queue = queue.Queue() @property def name(self): """Returns: the name of this workflow.""" return self._name def __str__(self): return "Workflow(name=%s)" % self._name def __repr__(self): return str(self) @property def tasks(self): """Returns: the map: task ID -> Task.""" return self._tasks @property def deps(self): """Returns: the set of dependencies, as Dependency directed edges.""" return self._deps @property def started(self): """Returns: whether the workflow is started.""" return self._started @property def failed_tasks(self): """Set of tasks that failed, directly or transitively.""" with self._lock: return frozenset(self._failure) @property def successful_tasks(self): """Set of tasks that completed successfully.""" with self._lock: return frozenset(self._success) def GetTask(self, task_id): """Gets a task by ID. Args: task_id: ID of the task. Returns: The task with the specified ID. """ return self._tasks[task_id] def _add_task(self, task): """Adds a new task to this workflow. Used by Task.__init__() to register new task objects. Args: task: New Task object to add. """ assert not self._started assert (task.task_id not in self._tasks), \ ('Duplicate task ID %r' % task.task_id) self._tasks[task.task_id] = task def AddDep(self, before, after): """Adds a dependency between two tasks. Args: before: Task or ID of the task that must run before the other. after: Task or ID of the task that must run after the other. """ before_id = get_task_id(before) after_id = get_task_id(after) dep = Dependency(before=before_id, after=after_id) self._AddDep(dep) def _AddDep(self, dep): """Registers a Dependency. Args: dep: Dependency tuple. """ if dep not in self._deps: self._deps.add(dep) before = self._tasks.get(dep.before) if before is not None: before._runs_before.add(dep.after) after = self._tasks.get(dep.after) if after is not None: after._runs_after.add(dep.before) def build(self): """Completes the worflow definition phase.""" self._tasks = base.ImmutableDict(self._tasks) self._deps = frozenset(self._deps) # Freeze descriptors: for task in self._tasks.values(): task._build() # Minimal validation: for task in self._tasks.values(): for dep_id in task.runs_after: assert (dep_id in self._tasks), \ ('Task %r has dependency on unknown task %r' % (task.task_id, dep_id)) self._check_circular_deps() def _check_circular_deps(self): """Checks for circular dependencies.""" # Set of task IDs that are completed: completed = set() # Set of tasks that are left: pending = set(self._tasks.values()) while (len(pending) > 0): runnable = set() for task in pending: if completed.issuperset(task.runs_after): runnable.add(task) if len(runnable) == 0: raise CircularDependencyError() pending.difference_update(runnable) completed.update(map(lambda task: task.task_id, runnable)) def process( self, nworkers=1, monitor_thread=True, sync=True, ): """Processes the tasks from the pool. Args: nworkers: Number of workers to process tasks. monitor_thread: Whether to start a monitor thread. sync: Whether to wait for the workflow to complete. Returns: When synchronous, whether the workflow is successful. None otherwise. """ assert not self._started self._started = True # Initializes runnable/pending task sets: for task in self._tasks.values(): if task.is_runnable: self._runnable_queue.put(task) self._runnable.add(task) else: self._pending.add(task) # Log initial state of tasks: self._dump() # Short-circuit if workflow is empty: self._notify_if_done() if ((len(self._runnable) == 0) and (len(self._pending) == 0)): return # Starts workers: self._workers = list() for iworker in range(nworkers): worker_id = '%s-#%d' % (self._name, iworker) self._workers.append(Worker(worker_id=worker_id, task_queue=self)) if monitor_thread: self._monitor = threading.Thread(target=self._monitor) self._monitor.start() else: self._monitor = None if sync: return self.wait() else: return None def wait(self): """Waits for all the tasks to be processed. Returns: Whether the workflow is successful. """ self._done.wait() # Notify all workers to exit: for _ in self._workers: self._runnable_queue.put(None) for worker in self._workers: worker.join() # Wait for monitor thread to exit: if self._monitor is not None: self._monitor.join() return (len(self.failed_tasks) == 0) def _monitor(self): """Monitoring thread to dump the state of the worker pool periodically.""" while not self._done.wait(timeout=5.0): with self._lock: logging.debug( 'Running: %s', ','.join(map(lambda task: task.task_id, self._running))) logging.debug('Monitor thread exiting') def pick(self): """Waits for and picks a runnable task. Returns: A runnable task if any, or None. """ task = self._runnable_queue.get() if task is None: # Signal the worker should exit return None with self._lock: self._runnable.remove(task) self._running.add(task) return task def _report_task_complete(self, task): if task.state == TaskState.SUCCESS: self._task_success(task) elif task.state == TaskState.FAILURE: self._task_failure(task) else: raise Error('Invalid task completion status: %r' % task.state) def _task_success(self, task): """Processes the success of a task. Args: task: ID of the task that completed successfully. """ logging.debug('Task %r completed with success.', task.task_id) with self._lock: self._success.add(task) self._running.remove(task) # Identify tasks that were pending and now become runnable: new_runnable = set() for pending_id in task.runs_before: pending = self._tasks[pending_id] pending._task_success(task) if pending.is_runnable: new_runnable.add(pending) # Update pending and runnable sets accordingly: self._pending.difference_update(new_runnable) self._runnable.update(new_runnable) for runnable_task in new_runnable: self._runnable_queue.put(runnable_task) self._dump() self._notify_if_done() def _task_failure(self, task): """Processes the failure of a task. Args: task: ID of the task that completed as a failure. """ logging.debug('Task %r completed with failure.', task.task_id) def _FailRec(task, cause): """Recursively fails transitive dependencies. Args: task: Transitive dependency that fails. cause: Task that causes the dependency to fail. """ logging.debug( 'Task %r failed as a dependency of %r', task.task_id, cause.task_id) task._task_failure(cause) self._pending.discard(task) self._failure.add(task) for task_id in task.runs_before: _FailRec(task=self._tasks[task_id], cause=task) with self._lock: self._running.remove(task) self._failure.add(task) for task_id in task.runs_before: _FailRec(task=self._tasks[task_id], cause=task) self._dump() self._notify_if_done() def _notify_if_done(self): """Tests whether there is more work to do. Assumes external synchronization. """ if ((len(self._pending) > 0) and ((len(self._running) + len(self._runnable)) == 0)): raise CircularDependencyError() if len(self._pending) > 0: return if len(self._runnable) > 0: return if len(self._running) > 0: return self._done.set() # Template to dump this workflow as a Graphiv/Dot definition: _DOT_TEMPLATE = base.strip_margin("""\ |digraph Workflow { |%(nodes)s |%(deps)s |}""") def dump_as_dot(self): """Dumps this workflow as a Graphviz/Dot definition. Returns: A Graphviz/Dot definition for this workflow. """ def make_node(task): return (' %s;' % base.make_ident(task.task_id)) def make_dep(dep): return (' %s -> %s;' % (base.make_ident(dep.after), base.make_ident(dep.before))) nodes = sorted(map(make_node, self._tasks.values())) deps = sorted(map(make_dep, self._deps)) return self._DOT_TEMPLATE % dict( nodes='\n'.join(nodes), deps='\n'.join(deps), ) @staticmethod def _get_task_label(task): return task.graphviz_label def dump_run_state_as_dot(self, make_task_label=None): """Dumps this workflow as a Graphviz/Dot definition. Args: make_task_label: Optional function: task -> task node label. Default is to use Task.MakeDotLabel(). Returns: A Graphviz/Dot definition for this workflow. """ if make_task_label is None: make_task_label = self._get_task_label def make_node(task): task_id = task.task_id if task.state == TaskState.FAILURE: color = "black" fillcolor = "red" fontcolor = "black" elif task.state == TaskState.SUCCESS: color = "black" fillcolor = "green" fontcolor = "white" elif task in self._running: color = "black" fillcolor = "yellow" fontcolor = "black" elif task in self._runnable: color = "black" fillcolor = "grey" fontcolor = "black" else: color = "black" fillcolor = "white" fontcolor = "black" label = make_task_label(task) return (""" %s [color="%s", fillcolor="%s", fontcolor="%s", style="filled", label="%s"];""" % (base.make_ident(task_id), color, fillcolor, fontcolor, label)) # Map: source -> set of dependencies dep_map = dict() # Seed the dependency map with all tasks: for task in self._tasks.values(): dep_map[base.make_ident(task.task_id)] = set() # Add dependencies: for dep in self._deps: dep_map[base.make_ident(dep.after)].add(base.make_ident(dep.before)) dep_map = _minimize_dep_map(_maximize_dep_map(dep_map)) deps = [] for src_target, dest_deps in dep_map.items(): for dest_dep in dest_deps: deps.append(' %s -> %s;' % (src_target, dest_dep)) return self._DOT_TEMPLATE % dict( nodes='\n'.join(sorted(map(make_node, self._tasks.values()))), deps='\n'.join(sorted(deps)), ) def dump_state_as_table(self): """Dumps the running state of this workflow as an HTML table. Returns: The running state of this workflow as an HTML table. """ with self._lock: successes = frozenset(self._success) failures = frozenset(self._failure) pending = frozenset(self._pending) running = frozenset(self._running) runnable = frozenset(self._runnable) def format_task(task): if task.start_time is None: return task.task_id elif task.end_time is None: return '%s (start time: %s - elapsed: %s)' % ( task.task_id, base.timestamp(task.start_time.timestamp()), datetime.datetime.now() - task.start_time) else: return '%s (start time: %s - end time: %s - duration: %s)' % ( task.task_id, base.timestamp(task.start_time.timestamp()), base.timestamp(task.end_time.timestamp()), task.end_time - task.start_time) successes = frozenset(map(format_task, successes)) failures = frozenset(map(format_task, failures)) pending = frozenset(map(format_task, pending)) running = frozenset(map(format_task, running)) runnable = frozenset(map(format_task, runnable)) return base.strip_margin("""\ |Running: %(nrunning)s |Runnable: %(nrunnable)s |Pending: %(npending)s |Successful: %(nsuccesses)s |Failed: %(nfailures)s |%(ruler)s |Running tasks: |%(running)s |%(ruler)s |Runnable tasks: |%(runnable)s |%(ruler)s |Pending tasks: |%(pending)s |%(ruler)s |Successful tasks: |%(successes)s |%(ruler)s |Failed tasks: |%(failures)s """) % dict( ruler = '-' * 80, nrunning = len(running), nrunnable = len(runnable), npending = len(pending), nsuccesses = len(successes), nfailures = len(failures), running = '\n'.join(map(lambda s: ' - %s' % s, sorted(running))), runnable = '\n'.join(map(lambda s: ' - %s' % s, sorted(runnable))), pending = '\n'.join(map(lambda s: ' - %s' % s, sorted(pending))), successes = '\n'.join(map(lambda s: ' - %s' % s, sorted(successes))), failures = '\n'.join(map(lambda s: ' - %s' % s, sorted(failures))), ) def _dump(self): if (logging.getLogger().level > LOG_LEVEL.DEBUG_VERBOSE): return logging.debug('Runnable:%s', ''.join(map(lambda task: '\n\t%s' % task, self._runnable))) logging.debug('Pending:%s', ''.join(map(lambda task: '\n\t%s' % task, self._pending))) logging.debug('Running:%s', ''.join(map(lambda task: '\n\t%s' % task, self._running))) def dump_as_svg(self): dot_source = self.dump_run_state_as_dot() with tempfile.NamedTemporaryFile(suffix='.dot') as dot_file: with tempfile.NamedTemporaryFile(suffix='.svg') as svg_file: dot_file.write(dot_source.encode()) dot_file.flush() cmd = command.Command( args=['dot', '-Tsvg', '-o%s' % svg_file.name, dot_file.name], exit_code=0, wait_for=False, ) # Allow 10s for Graphiz to complete, or kill it: try: cmd.WaitFor(timeout=10.0) except TimeoutError: cmd.Kill(sig=signal.SIGKILL) raise return svg_file.read().decode() def prune(self, tasks, direction): """Prunes the workflow according to a sub-set of required tasks. Args: tasks: Collection of tasks to keep. Tasks that are not in this set or not required transitively through upstream/downstream dependencies of this set are discarded. direction: Either DOWNSTREAM or UPSTREAM. """ assert not self._started # Exhaustive list of tasks to keep: if direction == UPSTREAM: tasks = get_upstream_tasks(flow=self, tasks=tasks) elif direction == DOWNSTREAM: tasks = get_downstream_tasks(flow=self, tasks=tasks) else: raise Error('Invalid filtering direction: %r' % direction) keep_ids = frozenset(map(lambda task: task.task_id, tasks)) # IDs of the tasks to remove: remove_ids = set(self._tasks.keys()) remove_ids.difference_update(keep_ids) for task_id in remove_ids: del self._tasks[task_id] # Filter dependencies: remove_deps = tuple(filter( lambda dep: (dep.before in remove_ids) or (dep.after in remove_ids), self._deps)) self._deps.difference_update(remove_deps) # Update task descriptors: for task in self._tasks.values(): task._runs_after.difference_update(remove_ids) task._runs_before.difference_update(remove_ids)
@pytest.mark.parametrize( "content, expected", [ ( strip_margin( """ |creature_birds - file title is ignored |[OBJECT:CREATURE] |[CREATURE:BIRD_BLUEJAY] - context will be changed to CREATURE:BIRD_BLUEJAY |[DESCRIPTION:A small blue-crested bird living in temperate woodlands, known for its harsh chirps.] |[NAME:blue jay:blue jays:blue jay] |[CASTE_NAME:blue jay:blue jays:blue jay] |[GENERAL_CHILD_NAME:blue jay hatchling:blue jay hatchlings] |[CREATURE_TILE:144][COLOR:1:0:1] |[BIOME:GRASSLAND_TEMPERATE] | |[CREATURE:BIRD_CARDINAL] - context will be changed to CREATURE:BIRD_CARDINAL |[DESCRIPTION:A small bright red bird with a distinctive crest, found in temperate forests.] |[NAME:cardinal:cardinals:cardinal] |[CASTE_NAME:cardinal:cardinals:cardinal] |[GENERAL_CHILD_NAME:cardinal hatchling:cardinal hatchlings] |[CREATURE_TILE:144][COLOR:4:0:1] |[PETVALUE:30][NATURAL][PET] """ ).strip(), [ TranslationItem( context="CREATURE:BIRD_BLUEJAY", text=( "[DESCRIPTION:"