def export_runs(self, results, exporters): ''' export data retrieved to the set of exporters defined and active. maybe an export flag could be set to choose to run + export? ''' for name, result in results.items(): task = self.get_task(name, save=True) # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error('%s returned empty list of results.' % name) # for a json, or a list of paths, ignore for now. elif not (task.params.get('save_as') == 'json' or os.path.exists(result[0])): for exporter in exporters: bot.debug('Exporting list to ' + exporter.name) exporter._save_text_list(name, result) # Case 2. The result is a string elif isinstance(result, str): # if it's a path to a file, ignore it. if not (os.path.exists(result)): exporter._save_text(result)
def __init__(self, workers=None, show_progress=False): if workers is None: workers = WATCHME_WORKERS self.workers = workers self.show_progress = show_progress bot.debug("Using %s workers for multiprocess." % (self.workers))
def git_clone(repo, name=None, base=None, force=False): """clone a git repo to a destination. The user can provide the following groupings of arguments: base without name: destination is ignored, the repo is cloned (named as it is) to the base. If the folder exists, --force must be used to remove it first. base with name: destination is ignored, repo is cloned (and named based on name variable) to the base. The same applies for force. dest provided: the repo is cloned to the destination, if it doesn't exist and/or force is True. Parameters ========== name: the name of the watcher to add base: the base of the watcher (defaults to $HOME/.watchme force: remove first if already exists """ if base is None: base = WATCHME_BASE_DIR # Derive the repository name if name is None: name = os.path.basename(repo).replace(".git", "") # First clone to temporary directory tmpdir = get_tmpdir() command = "git clone %s %s" % (repo, tmpdir) bot.debug(command) run_command(command) # ensure there is a watchme.cfg if not os.path.exists(os.path.join(tmpdir, "watchme.cfg")): shutil.rmtree(tmpdir) bot.exit("No watchme.cfg found in %s, aborting." % repo) # If it's good, move the repository dest = os.path.join(base, name) # Don't allow for overwrite if os.path.exists(dest): if force is False: shutil.rmtree(tmpdir) bot.exit("%s exists. Use --force to overwrite" % dest) else: shutil.rmtree(dest) # Move the repository there shutil.move(tmpdir, dest) # Ensure we don't sign gpg key run_command("git --git-dir=%s/.git config commit.gpgsign false" % dest) bot.info("Added watcher %s" % name)
def git_date(repo, commit): """get the date for a particular commit. Parameters ========== repo: the full path to the repository commit: the commit to get the date for """ command = "git show -s --format=" + "%ci " + commit bot.debug(command) result = run_command(command) if result["return_code"] == 0: return result["message"].strip("\n")
def git_date(repo, commit): '''get the date for a particular commit. Parameters ========== repo: the full path to the repository commit: the commit to get the date for ''' command = 'git show -s --format=' + "%ci " + commit bot.debug(command) result = run_command(command) if result['return_code'] == 0: return result['message'].strip('\n')
def git_add(repo, files): """add one or more files to the git repo. Parameters ========== repo: the repository to commit to. files: one or more files to add. """ if not isinstance(files, list): files = [files] for f in files: command = "git add %s" % f bot.debug(command) run_command(command)
def git_commit(repo, task, message): """Commit to the git repo with a particular message. folder. Parameters ========== repo: the repository to commit to. task: the name of the task to add to the commit message message: the message for the commit, passed from the client """ # Commit with the watch group and date string message = "watchme %s %s" % (task, message) # Commit command = 'git commit -a -m "%s"' % message bot.debug(command) run_command(command)
def git_show(repo, commit, filename): """git show is used to pipe the content of a file at a particular commit to the screen (and calling python client). We must be in the $PWD of the repo for this to work. Parameters ========== repo: the repository to interact with commit: the commit to investigate for the file filename: the relative path to the file """ command = "git show %s:%s" % (commit, filename) bot.debug(command) result = run_command(command) if result["return_code"] == 0: return result["message"].strip("\n")
def export_func(self): """this function should return the correct task (from the tasks.py in the same folder) based on some logic of the params that are given by the user (self.params). If there is only one kind of function for the task, it's fairly easy to import and return it here. This function should take no arguments, but instead use the self.params already provided in the client. """ name = self.params.get("func", "get_task") if name == "get_task": from .tasks import get_task as func elif name == "download_task": from .tasks import download_task as func elif name == "post_task": from .tasks import post_task as func elif name == "get_url_selection": from .tasks import get_url_selection as func else: func = None bot.debug("function name is %s" % name) return func
def monitor_pid_task(**kwargs): '''monitor a specific process. This function can be used as a task, or is (most likely used) for the psutils.decorators. A pid parameter is required. Parameters ========== skip: an optional list of (comma separated) fields to skip. Can be in net_io_counters,net_connections,net_if_address,net_if_stats pid: the process id or name (required) ''' pid = kwargs.get('pid', None) bot.debug(kwargs) # Helper function to get list from one,two,three def get_list(name): csv_list = kwargs.get(name, '') return [x for x in csv_list.split(',') if x] # A comma separated list of parameters to skip skip = get_list('skip') include = get_list('include') only = get_list('only') # Only continue given that argument is provided if pid is None: bot.warning( "A 'pid' parameter is required to use the monitor_pid_task function." ) return pid # The user is allowed to provide a process name, or a number try: pid = int(pid) except ValueError: pid = _get_pid(pid) # But if it's stil no good (None) we exit. if pid is None: bot.warning("'pid' must be a running process or process name.") return pid ps = psutil.Process(pid) bot.debug(ps) results = {} for key, val in ps.as_dict().items(): # If val is None, don't include if val is None: bot.debug('skipping %s, None' % key) continue if key == "connections": connections = [] for net in val: entry = { 'fd': net.fd, 'family': str(net.family), 'type': str(net.type), 'laddr_ip': net.laddr.ip, 'laddr_port': net.laddr.port, 'raddr': net.raddr, 'status': net.status } connections.append(entry) val = connections # First priority goes to a custom set if len(only) > 0: if key in only: results[key] = val else: bot.debug('skipping %s' % key) continue # The user requested to skip elif key in skip or key in ['threads']: continue # Keep count of openfiles elif key in ["open_files"]: results[key] = len(val) # Don't risk exposing sensitive information elif key == "environ": if key in include: results[key] = val # Skip over ones that are too detailed elif key in ['memory_maps']: continue # I assume this included all that are in pmem too elif isinstance(val, psutil._pslinux.pfullmem): results[key] = { "rss": val.rss, "vms": val.vms, "shared": val.shared, "text": val.text, "lib": val.lib, "data": val.data, "dirty": val.dirty, "uss": val.uss, "pss": val.pss, "swap": val.swap } elif isinstance(val, psutil._common.pgids) or isinstance( val, psutil._common.puids): results[key] = { "real": val.real, "effetive": val.effective, "saved": val.saved } elif isinstance(val, psutil._common.pcputimes): results[key] = { "user": val.user, "system": val.system, "children_user": val.children_user, "children_system": val.children_system } elif isinstance(val, psutil._common.pctxsw): results[key] = { "voluntary": val.voluntary, "involuntary": val.involuntary } elif isinstance(val, psutil._common.pionice): results[key] = {"value": val.value} # Older Python version (2) doesn't have attribute if hasattr(val.ioclass, 'name'): results[key]["ioclass"] = val.ioclass.name # pfullmem (first above) should cover this elif isinstance(val, psutil._pslinux.pmem): continue elif isinstance(val, psutil._pslinux.pio): results[key] = { "read_count": val.read_count, "write_count": val.write_count, "read_bytes": val.read_bytes, "write_bytes": val.write_bytes, "read_chars": val.read_chars, "write_chars": val.write_chars } else: results[key] = val # Add any environment variables prefixed wit WATCHMEENV_ environ = get_watchme_env() results.update(environ) return results
def end(self): self.end_time = time.time() self.runtime = self.runtime = self.end_time - self.start_time bot.debug("Ending multiprocess, runtime: %s sec" % (self.runtime))
def start(self): bot.debug("Starting multiprocess") self.start_time = time.time()
def write_results(self, result, repo): """an entrypoint function for a general task. By default, we parse results based on the result type. Any particular subclass of the TaskBase can modify or extend these functions. Parameters ========== result: the result object to parse repo: the repo base (watcher.repo) """ files = [] # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error("%s returned empty list of results." % self.name) # multiple jsons save specified, regardless elif self.params.get("save_as") == "jsons": bot.debug("Saving single list as multiple json...") files += self._save_json_list(result, repo) # json output is specified by the user or we find dict results elif self.params.get("save_as") == "json" or isinstance( result[0], dict): bot.debug("Saving single list as one json...") files.append(self._save_json(result, repo)) # Otherwise, sniff for list of paths elif os.path.exists(result[0]): bot.debug("Found list of paths...") files += self._save_files_list(result, repo) # Finally, assume just writing text to file else: bot.debug("Saving content from list to file...") files += self._save_text_list(result, repo) # Case 2. The result is a string elif isinstance(result, str): files = self._save_str_result(files, result, repo) # Case 3. The result is a dictionary elif isinstance(result, dict): files.append(self._save_json(result, repo)) elif result is None: bot.error("Result for task %s is None" % self.name) elif hasattr(self, "_write_results"): return self._write_results(result) # If it's unicode, try encoding, and then fail (repetitive) else: try: result = result.encode("utf-8") files = self._save_str_result(files, result, repo) except: bot.error("Unsupported result format %s" % type(result)) # Get rid of None results (don't check excessively for None above) files = [f for f in files if f] return files
def finish_runs(self, results): '''finish runs should take a dictionary of results, with keys as the folder name, and for each, depending on the result type, write the result to file (or update file) and then commit to git. Parameters ========== results: a dictionary of tasks, with keys as the task name, and values as the result. ''' for name, result in results.items(): task_folder = os.path.join(self.repo, name) task = self.get_task(name, save=True) # Files to be added via Git after files = [] # Ensure that the task folder exists if not os.path.exists(task_folder): mkdir_p(task_folder) git_add(self.repo, task_folder) # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error('%s returned empty list of results.' % name) # json output is specified elif task.params.get('save_as') == 'json': bot.debug('Saving single list as one json...') files.append(task._save_json(result, self.repo)) elif task.params.get('save_as') == 'json': bot.debug('Saving single list as multiple json...') files += task._save_json_list(result, self.repo) # Otherwise, sniff for list of paths elif os.path.exists(result[0]): bot.debug('Found list of paths...') files += task._save_files_list(result, self.repo) # Finally, assume just writing text to file else: bot.debug('Saving content from list to file...') files += task._save_text_list(result, self.repo) # Case 2. The result is a string elif isinstance(result, str): # if it's a path to a file, just save to repository if os.path.exists(result): files.append(task._save_file(result, self.repo)) # Otherwise, it's a string that needs to be saved to file else: files.append(task._save_text(result, self.repo)) # Case 3. The result is a dictionary elif isinstance(result, dict): files.append(task._save_json(result, self.repo)) elif result == None: bot.error('Result for task %s is None' % name) else: bot.error('Unsupported result format %s' % type(result)) # Get rid of None results (don't check excessively for None above) files = [f for f in files if f] # Add files to git, and commit files.append(write_timestamp(repo=self.repo, task=name)) git_add(repo=self.repo, files=files) git_commit(repo=self.repo, task=self.name, message="ADD results %s" % name)
def write_results(self, result, repo): '''an entrypoint function for a general task. By default, we parse results based on the result type. Any particular subclass of the TaskBase can modify or extend these functions. Parameters ========== result: the result object to parse repo: the repo base (watcher.repo) ''' files = [] # Case 1. The result is a list if isinstance(result, list): # Get rid of Nones, if the user accidentally added result = [r for r in result if r] if len(result) == 0: bot.error('%s returned empty list of results.' % self.name) # multiple jsons save specified, regardless elif self.params.get('save_as') == 'jsons': bot.debug('Saving single list as multiple json...') files += self._save_json_list(result, repo) # json output is specified by the user or we find dict results elif self.params.get('save_as') == 'json' or isinstance( result[0], dict): bot.debug('Saving single list as one json...') files.append(self._save_json(result, repo)) # Otherwise, sniff for list of paths elif os.path.exists(result[0]): bot.debug('Found list of paths...') files += self._save_files_list(result, repo) # Finally, assume just writing text to file else: bot.debug('Saving content from list to file...') files += self._save_text_list(result, repo) # Case 2. The result is a string elif isinstance(result, str): # if it's a path to a file, just save to repository if os.path.exists(result): files.append(self._save_file(result, repo)) # Otherwise, it's a string that needs to be saved to file else: files.append(self._save_text(result, repo)) # Case 3. The result is a dictionary elif isinstance(result, dict): files.append(self._save_json(result, repo)) elif result == None: bot.error('Result for task %s is None' % self.name) elif hasattr(self, '_write_results'): return self._write_results(result) else: bot.error('Unsupported result format %s' % type(result)) # Get rid of None results (don't check excessively for None above) files = [f for f in files if f] return files