def _exporter_selected(self, exporter, regexp=None): '''check if an exporter is active and (if defined) passes user provided exporter names or regular expressions. Parameters ========== exporter: the exporter object to check regexp: an optional regular expression (or name) to check ''' selected = True # A exporter can be None if it wasn't found if exporter == None: selected = False # Is the exporter not active (undefined is active)? active = exporter.params.get('active', 'true') if active == "false": bot.info('Exporter %s is not active.' % exporter) selected = False # The user wants to search for a custom task name if regexp != None: if not re.search(regexp, exporter): bot.info('Exporter %s is selected for data export.' % exporter) selected = False return selected
def get_tasks(self, regexp=None, quiet=False, active=True): '''get the tasks for a watcher, possibly matching a regular expression. A list of dictionaries is returned, each holding the parameters for a task. "uri" will hold the task (folder) name, active Parameters ========== regexp: if supplied, the user wants to run only tasks that match a particular pattern quiet: If quiet, don't print the number of tasks found active: only return active tasks (default True) ''' self.load_config() tasks = [] for section in self.config._sections: # Get the task based on the section name task = self.get_task(section) # Check that the task should be run, and is valid if task is not None: if self._task_selected(task, regexp, active) and task.valid: tasks.append(task) if not quiet: bot.info('Found %s contender tasks.' % len(tasks)) return tasks
def get_exporters(self, regexp=None): '''get the exporters for a watcher, possibly matching a regular expression. A list of dictionaries is returned, each holding the parameters for an exporter. An exporter has an active attribute. Parameters ========== regexp: if supplied, the user wants to export to destinations that only match the expression specified. ''' self.load_config() exporters = [] for section in self.config._sections: # Get the exporter based on the section name exporter = self.get_exporter(section) # Check that the exporter should be used, and is valid if exporter != None: if self._exporter_selected(exporter, regexp) and exporter.valid: exporters.append(exporter) bot.info('Found %s contender exporters.' % len(exporters)) return exporters
def generate_watcher_config(path, watcher_type=None): '''generate a watcher config, meaning a watcher folder in the watchme base folder. Parameters ========== path: the path to the watcher repository ''' check_exists(path) configfile = get_configfile_template() watcher_config = os.path.join(path, 'watchme.cfg') if not os.path.exists(watcher_config): bot.info('Generating watcher config %s' % watcher_config) shutil.copyfile(configfile, watcher_config) # Complete generation includes the watcher type if watcher_type is None: watcher_type = WATCHME_DEFAULT_TYPE # The template config has the section, but just in case config = read_config(configfile) if 'watcher' not in config.sections(): config.add_section('watcher') config['watcher']['type'] = watcher_type # Save to file write_config(watcher_config, config)
def _task_selected(self, task, regexp=None, active=True): '''check if a task is active and (if defined) passes user provided task names or regular expressions. Parameters ========== task: the task object to check regexp: an optional regular expression (or name) to check active: a task is selected if it's active (default True) ''' selected = True # A task can be None if it wasn't found if task is None: selected = False # Is the task not active (undefined is active)? is_active = task.params.get('active', 'true') if is_active == "false" and active: bot.info('Task %s is not active.' % task) selected = False # The user wants to search for a custom task name if regexp is not None and task is not None: if not re.search(regexp, task.name): bot.info('Task %s is not selected to run.' % task) selected = False return selected
def create_watcher(name=None, watcher_type=None, base=None, exporter=None): '''create a watcher, meaning a folder with a configuration and initialized git repo. Parameters ========== name: the watcher to create, uses default or WATCHME_WATCHER watcher_type: the type of watcher to create. defaults to WATCHER_DEFAULT_TYPE ''' if name == None: name = WATCHME_WATCHER if base == None: base = WATCHME_BASE_DIR # Create the repository folder repo = os.path.join(base, name) if not os.path.exists(repo): bot.info('Adding watcher %s...' % repo) mkdir_p(repo) # Ensure no gpg signing happens run_command("git --git-dir=%s/.git init" % repo) run_command("git --git-dir=%s/.git config commit.gpgsign false" % repo) # Add the watcher configuration file generate_watcher_config(repo, watcher_type, exporter) run_command("git -C %s add watchme.cfg" % repo) return repo else: bot.info('%s already exists: %s' % (name, repo))
def git_clone(repo, name=None, base=None, force=False): """clone a git repo to a destination. The user can provide the following groupings of arguments: base without name: destination is ignored, the repo is cloned (named as it is) to the base. If the folder exists, --force must be used to remove it first. base with name: destination is ignored, repo is cloned (and named based on name variable) to the base. The same applies for force. dest provided: the repo is cloned to the destination, if it doesn't exist and/or force is True. Parameters ========== name: the name of the watcher to add base: the base of the watcher (defaults to $HOME/.watchme force: remove first if already exists """ if base is None: base = WATCHME_BASE_DIR # Derive the repository name if name is None: name = os.path.basename(repo).replace(".git", "") # First clone to temporary directory tmpdir = get_tmpdir() command = "git clone %s %s" % (repo, tmpdir) bot.debug(command) run_command(command) # ensure there is a watchme.cfg if not os.path.exists(os.path.join(tmpdir, "watchme.cfg")): shutil.rmtree(tmpdir) bot.exit("No watchme.cfg found in %s, aborting." % repo) # If it's good, move the repository dest = os.path.join(base, name) # Don't allow for overwrite if os.path.exists(dest): if force is False: shutil.rmtree(tmpdir) bot.exit("%s exists. Use --force to overwrite" % dest) else: shutil.rmtree(dest) # Move the repository there shutil.move(tmpdir, dest) # Ensure we don't sign gpg key run_command("git --git-dir=%s/.git config commit.gpgsign false" % dest) bot.info("Added watcher %s" % name)
def clear_schedule(self): '''clear all cron jobs associated with the watcher. To remove jobs associated with a single watcher, use remove_schedule ''' cron = self.get_crontab() bot.info('Clearing jobs associated with all watchers') cron.remove_all(comment='watchme-*') # Save new cron cron.write_to_user(user=True) return cron
def _get_config(name, exporter): '''shared function to return a file in the config directory ''' exporter_path = exporter or '' template_path = os.path.join(get_installdir(), 'config', 'templates', exporter_path, name) if os.path.exists(template_path): return os.path.abspath(template_path) else: bot.info( 'The exporter specified does not exist : %s. The task was created with no exporters. ' % exporter) return os.path.abspath( os.path.join(get_installdir(), 'config', 'templates', name))
def create_watcher_base(name=None, base=None): """create a watch base and default repo, if it doesn't already exist. Parameters ========== name: the watcher to create, uses default or WATCHME_WATCHER base: the watcher base, defaults to WATCHME_BASE_DIR """ if base is None: base = WATCHME_BASE_DIR if name is None: name = WATCHME_WATCHER if not os.path.exists(base): bot.info("Creating %s..." % base) mkdir_p(base)
def list_watcher(watcher, base=None): '''list the contents (tasks) of a single watcher. Parameters ========== base: the watchme base, defaults to $HOME/.watchme ''' if base == None: base = WATCHME_BASE_DIR repo = os.path.join(base, watcher) if os.path.exists(repo): files = os.listdir(repo) bot.custom(prefix="task:", message="%s" % repo, color="CYAN") bot.info('\n '.join(files)) else: bot.exit('%s does not exist.' % base)
def print_section(self, section): '''print a section (usually a task) from a configuration file, if it exists. Parameters ========== section: the name of the section (task) ''' self.load_config() if section in self.config: bot.info('[%s]' % section) for key in self.config[section]: value = self.config[section][key] bot.custom(prefix=key, message=" = %s" % value, color="CYAN") else: bot.exit('%s is not a valid section.' % section)
def print_add_task(self, task): '''assemble a task section into a command that can create/add it. Parameters ========== task: the name of the task to inspect ''' self.load_config() if task in self.config: command = "watchme add %s" % task for key in self.config[task]: value = self.config[task][key] command = "%s %s@%s" % (command, key, value) bot.info(command) else: bot.exit('%s is not a valid task.' % task)
def get_watchers(base=None, quiet=False): '''list the watchers installed at a base. If base is not defined, the default base is used. Parameters ========== base: the watchme base, defaults to $HOME/.watchme ''' if base == None: base = WATCHME_BASE_DIR if os.path.exists(base): watchers = os.listdir(base) if quiet == False: bot.info('\n'.join(watchers)) return watchers else: bot.exit('%s does not exist.' % base)
def get_watchers(base=None, quiet=False): """list the watchers installed at a base. If base is not defined, the default base is used. Parameters ========== base: the watchme base, defaults to $HOME/.watchme """ if base is None: base = WATCHME_BASE_DIR if os.path.exists(base): watchers = os.listdir(base) if not quiet: bot.info("\n".join(watchers)) return watchers else: bot.exit("%s does not exist." % base)
def _general_list(path, prefix="path", base=None): """a shared function for listing (and returning) files. Parameters ========== path: the full path to list, if it exists prefix: a prefix to print for the type base: the watchme base, defaults to $HOME/.watchme """ if base is None: base = WATCHME_BASE_DIR if os.path.exists(path): files = os.listdir(path) bot.custom(prefix="%s:" % prefix, message="%s" % path, color="CYAN") bot.info("\n ".join(files)) else: bot.exit("%s does not exist." % base)
def delete(self): '''delete the entire watcher, only if not protected. Cannot be undone. ''' self.load_config() # Check for protection if self.is_frozen(): bot.exit('watcher %s is frozen, unfreeze to delete.' % self.name) elif self.is_protected(): bot.exit('watcher %s is protected, turn off protection to delete.' % self.name) repo = os.path.dirname(self.configfile) # Ensure repository exists before delete if os.path.exists(repo): bot.info('Removing watcher %s' % self.name) shutil.rmtree(repo) else: bot.exit("%s:%s doesn't exist" % (self.name, repo))
def remove_schedule(self, name=None, quiet=False): '''remove a scheduled item from crontab, this is based on the watcher name. By default, we use the watcher instance name, however you can specify a custom name if desired. ''' if name == None: name = self.name cron = self.get_crontab() comment = 'watchme-%s' % self.name found = False for job in cron.find_comment(comment): found = True cron.remove(job) if found is True: bot.info('Removed schedule for watcher %s' % name) cron.write_to_user(user=True) return cron
def get_commits(repo, from_commit=None, to_commit=None, grep=None, filename=None): """get commits, starting from and going to a particular commit. if grep is defined, filter commits to those with messages that match that particular expression Parameters ========== from_commit: the commit to start at to_commit: the commit to go to grep: the expression to match (not used if None) filename: the filename to filter to. Includes all files if not specified. """ command = 'git log --all --oneline --pretty=tformat:"%H"' # The earliest commit if from_commit is None: from_commit = get_earliest_commit() # The latest commit if to_commit is None: to_commit = get_latest_commit() # A regular expression to search for (and filter commits) if grep is not None: command = '%s --grep "ADD results"' % command # Add the commit range command = "%s %s..%s" % (command, from_commit, to_commit) if filename is not None: command = "%s -- %s" % (command, filename) bot.info(command) results = run_command(command)["message"] results = [x for x in results.split("\n") if x] return results
def run_tasks(self, queue, parallel=True, show_progress=True): '''this run_tasks function takes a list of Task objects, each potentially a different kind of task, and extracts the parameters with task.export_params(), and the running function with task.export_func(), and hands these over to the multiprocessing worker. It's up to the Task to return some correct function from it's set of task functions that correspond with the variables. Examples ======== funcs {'task-reddit-hpc': <function watchme.watchers.urls.tasks.get_task>} tasks {'task-reddit-hpc': [('url', 'https://www.reddit.com/r/hpc'), ('active', 'true'), ('type', 'urls')]} ''' if parallel is True: return self._run_parallel(queue, show_progress) # Otherwise, run in serial results = {} # Progressbar total = len(queue) progress = 1 for task in queue: prefix = "[%s:%s/%s]" % (task.name, progress, total) if show_progress is True: bot.show_progress(progress, total, length=35, prefix=prefix) else: bot.info('Running %s' % prefix) results[task.name] = task.run() progress += 1 return results
def remove_task(self, task): '''remove a task from the watcher repo, if it exists, and the watcher is not frozen. Parameters ========== task: the name of the task to remove ''' if self.get_section(task) is not None: if self.is_frozen(): bot.exit('watcher is frozen, unfreeze first.') self.remove_section(task) # If the task has a folder, remove the entire thing repo = os.path.join(self.repo, task) if os.path.exists(repo): shutil.rmtree(repo) bot.info('%s removed successfully.' % task) git_commit(self.repo, self.name, "REMOVE task %s" % task) else: bot.warning('Task %s does not exist.' % task)
def main(args, extra): """export temporal data for a watcher """ # Required - will print help if not provided name = args.watcher[0] task = args.task[0] filename = args.filename[0] if not task.startswith("task") and not task.startswith("decorator"): example = "watchme export watcher task-reddit result.txt" bot.exit('Task name must start with "task" or "decorator": %s' % example) # Use the output file, or a temporary file out = args.out # Get the watcher to interact with, must already exist watcher = get_watcher(name, base=args.base, create=False) if out is not None: if os.path.exists(out) and args.force is False: bot.exit("%s exists! Use --force to overwrite." % out) # Export the data to file result = watcher.export_dict(task=task, filename=filename, name=name, export_json=args.json, base=args.base) if result is not None: if out is None: print(json.dumps(result, indent=4)) else: write_json(result, out) bot.info("Result written to %s" % out)
def _active_status(self, status='true', name=None): '''a general function to change the status, used by activate and deactivate. Parameters ========== status: must be one of true, false name: if not None, we are deactivating a task (not the watcher) ''' # Load the configuration, if not loaded self.load_config() if name is None: name = 'watcher' # Cut out early if section not in config if name not in self.config._sections: bot.exit('%s is not a valid task or section' % name) if status not in ['true', 'false']: bot.exit('status must be true or false.') # Update the status and alert the user self.set_setting(name, 'active', status) self.save() # Return the message for the commit message = "ACTIVE" if status == "false": message = "DEACTIVATE" # Add the task name if name is not None: message = "%s task %s" % (message, name) bot.info('[%s|%s] active: %s' % (name, self.name, status)) return message
def main(args, extra): '''activate one or more watchers ''' # Required - will print help if not provided name = args.watcher[0] task = args.task[0] filename = args.filename[0] if not task.startswith('task'): example = 'watchme add watcher task-reddit url@https://www.reddit.com' bot.exit('Task name must start with "task", e.g., %s' % example) # Use the output file, or a temporary file out = args.out # Get the watcher to interact with, must already exist watcher = get_watcher(name, base=args.base, create=False) if out is not None: if os.path.exists(out) and args.force is False: bot.exit('%s exists! Use --force to overwrite.' % out) # Export the data to file result = watcher.export_dict(task=task, filename=filename, name=name, export_json=args.json, base=args.base) if result != None: if out == None: print(json.dumps(result, indent=4)) else: write_json(result, out) bot.info('Result written to %s' % out)
def edit_task(self, name, action, key, value=None): '''edit a task, meaning doing an addition (add), update (update), or "remove", All actions require a value other than remove. Parameters ========== name: the name of the task to update action: the action to take (update, add, remove) a parameter key: the key to update value: the value to update ''' if not self.has_task(name): bot.exit('%s is not a task defined by %s' % (name, self.name)) if action not in ['update', 'add', 'remove']: bot.exit('Action must be update, add, or remove') if action in ['update', 'add'] and value is None: bot.exit('A value must be provided for the %s action' % action) # Add, and it doesn't exist so it's okay if action == "add" and key not in self.config[name]: bot.info('Adding %s:%s to %s' % (key, value, name)) self.set_setting(name, key, value) # Already exists, encourage user to update elif action == "add" and key in self.config[name]: bot.exit('%s already exists. Use "update" action to change.' % key) # Update, and it's a valid choice elif action == 'update' and key in self.config[name]: bot.info('Updating %s to %s in %s' % (key, value, name)) self.set_setting(name, key, value) # Update, and it's not a valid choice elif action == 'update' and key not in self.config[name]: bot.exit('%s is not found in config, cannot be updated.' % key) # Remove, and it's a valid choice elif action == "remove" and key in self.config[name]: bot.info('Removing %s' % key) del self.config[name][key] # Remove, and it's not a valid choice elif action == "remove" and key not in self.config[name]: bot.exit('%s is not found in config, cannot be removed.' % key) self.save()
def list_watcher_types(): """list the exporter options provided by watchme""" bot.custom(prefix="watchme:", message="watcher task types", color="CYAN") bot.info("\n ".join(WATCHME_TASK_TYPES))
def schedule(self, minute=12, hour=0, month='*', day='*', weekday='*', job=None, force=False): '''schedule the watcher to run at some frequency to update record of pages. By default, the task will run at 12 minutes passed midnight, daily. You can change the variables to change the frequency. See https://crontab.guru/ to get a setting that works for you. Hourly: 0 * * * * Daily: 0 0 * * * (midnight) default weekly 0 0 * * 0 monthly 0 0 1 * * yearly 0 0 1 1 * Parameters ========== minute: must be within 1 and 60, or set to "*" for every minute hour: must be within 0 through 23 or set to * month: must be within 1 and 12, or * day: must be between 1 and 31, or * weekday: must be between 0 and 6 or * job: if provided, assumes we are updated an existing entry. ''' cron = self.get_crontab() # Cut out early if the job already exists, and force is false if self.has_schedule() and not force: bot.exit('%s already has a schedule. Use --force to update.' % self.name) # Remove any previous schedules cron = self.remove_schedule(quiet=True) # minute must be between * or 0 through 59, or * if minute not in ['*'] + list(range(60)): bot.exit('minute must be in [0..59] or equal to *') # Hour must be between 0 through 23, or * if hour not in ['*'] + list(range(24)): bot.exit('hour must be in [0..23] or equal to *') # Day must be in range 1 through 31, or * if day not in ['*'] + list(range(1, 32)): bot.exit('day must be in [1..31] or equal to *') # Day must be in range 1 through 31, or * if month not in ['*'] + list(range(1, 13)): bot.exit('month must be in [1..12] or equal to *') # Day must be in range 1 through 31, or * if weekday not in ['*'] + list(range(7)): bot.exit('weekday must be in [0..6] or equal to *') # The command will run the watcher, watcher.cfg controls what happens whereis = which('watchme') command = '%s run %s' % (whereis, self.name) comment = 'watchme-%s' % self.name if job == None: job = cron.new(command=command, comment=comment) # Set the time, and then write the job to file job.setall(minute, hour, day, month, weekday) job.enable() cron.write_to_user(user=True) bot.info(job) return job
def run(self, funcs, tasks): '''run will send a list of tasks, a tuple with arguments, through a function. the arguments should be ordered correctly. Parameters ========== funcs: the functions to run with multiprocessing.pool, a dictionary with lookup by the task name tasks: a dict of tasks, each task name (key) with a tuple of arguments to process ''' # Number of tasks must == number of functions assert (len(funcs) == len(tasks)) # Keep track of some progress for the user progress = 1 total = len(tasks) # if we don't have tasks, don't run if len(tasks) == 0: return # results will also have the same key to look up finished = dict() results = [] try: prefix = "[%s/%s]" % (progress, total) if self.show_progress: bot.show_progress(0, total, length=35, prefix=prefix) pool = multiprocessing.Pool(self.workers, init_worker) self.start() for key, params in tasks.items(): func = funcs[key] if not self.show_progress: bot.info('Processing task %s:%s' % (key, params)) result = pool.apply_async(multi_wrapper, multi_package(func, [params])) # Store the key with the result results.append((key, result)) while len(results) > 0: pair = results.pop() key, result = pair result.wait() if self.show_progress: bot.show_progress(progress, total, length=35, prefix=prefix) progress += 1 prefix = "[%s/%s]" % (progress, total) finished[key] = result.get() self.end() pool.close() pool.join() except (KeyboardInterrupt, SystemExit): bot.error("Keyboard interrupt detected, terminating workers!") pool.terminate() sys.exit(1) except Exception as e: bot.error(e) return finished