Esempio n. 1
0
    def get_task(self, name):
        """get a particular task, based on the name. This is where each type
           of class should check the "type" parameter from the config, and
           import the correct Task class.

           Parameters
           ==========
           name: the name of the task to load
        """
        self.load_config()

        task = None

        # Only sections that start with task- are considered tasks
        if name in self.config._sections and name.startswith("task"):

            # Task is an ordered dict, key value pairs are entries
            params = self.config._sections[name]

            # Get the task type (if removed, consider disabled)
            task_type = params.get("type", "")

            # If we get here, validate and prepare the task
            if task_type.startswith("url"):
                from .urls import Task

            elif task_type == "psutils":
                from .psutils import Task

            elif task_type == "results":
                from .results import Task

            elif task_type == "gpu":
                from .gpu import Task

            else:
                bot.exit("Type %s not properly set up in get_task" % task_type)

            # if not valid, will return None
            task = Task(name, params)

        return task
Esempio n. 2
0
def main(args, extra):
    """activate one or more watchers"""
    # Required - will print help if not provided
    name = args.watcher[0]
    watcher = get_watcher(name, base=args.base, create=False)

    # If delete is true, remove entire watcher, only if not protected or frozen
    if args.delete:
        watcher.delete()

    else:

        # Exit if the user doesn't provide any tasks to remove
        if extra is None:
            bot.exit("Provide tasks to remove, or --delete for entire watcher.")

        for task in extra:

            # Remove the task, if it exists
            watcher.remove_task(task)
Esempio n. 3
0
    def _add_task(self, task, force=False, active='true'):
        '''add a new task to the watcher, meaning we:

           1. Check first that the task doesn't already exist (if the task
              exists, we only add if force is set to true)
           2. Validate the task (depends on the task)
           3. write the task to the helper config file, if valid.

           Parameters
           ==========
           task: the Task object to add, should have a name and params and
                 be child of watchme.tasks.TaskBase
           force: if task already exists, overwrite
           active: add the task as active (default "true")
        '''
        self.load_config()

        if active not in ["true", "false"]:
            bot.exit('Active must be "true" or "false"')

        # Don't overwrite a section that already exists
        if task.name in self.config.sections():
            if not force:
                bot.exit('%s exists, use --force to overwrite.' % task.name)
            self.remove_section(task.name, save=False)

        # Add the new section
        self.config[task.name] = task.export_params(active=active)
        self.print_section(task.name)
        self.save()

        # If the task folder doesn't exist, recreate it.
        task_folder = os.path.join(self.repo, task.name)
        if not os.path.exists(task_folder):
            mkdir_p(task_folder)
            git_add(self.repo, task.name)

        # Commit changes
        git_commit(repo=self.repo,
                   task=self.name,
                   message="ADD task %s" % task.name)
Esempio n. 4
0
    def get_decorator(self, name):
        '''instantiate a task object for a decorator. Decorators must start
           with "decorator-" and since they are run on the fly, we don't
           find them in the config.

           Parameters
           ==========
           name: the name of the task to load
        '''

        task = None

        # Only psutils has decorators
        if name.startswith('decorator-psutils'):
            from .psutils import Task

        else:
            bot.exit('Type %s is not recognized in get_decorator' % name)

        task = Task(name)
        return task
Esempio n. 5
0
    def _get_params_dict(self, pairs):
        '''iterate through parameters, make keys lowercase, and ensure
           valid format.

           Parameters
           ==========
           pairs: a list of key@value pairs to set.
        '''
        params = {}
        for pair in pairs:
            if "@" not in pair:
                bot.exit('incorrectly formatted param, must be key@value')
            key, value = pair.split('@', 1)
            key = key.lower()

            # All tasks are not allowed to have default params
            if key in WATCHME_NOTALLOWED_PARAMS:
                bot.error('%s is a default, not allowed setting by task.' % key)
                self.valid = False
            params[key] = value
        return params
Esempio n. 6
0
    def add_task(self, task, task_type, params, force=False, active="true"):
        """add a task, meaning ensuring that the type is valid, and that
           the parameters are valid for the task.

           Parameters
           ==========
           task: the Task object to add, should have a name and params and
                 be child of watchme.tasks.TaskBase
           task_type: must be in WATCHME_TASK_TYPES, meaning a client exists
           params: list of parameters to be validated (key@value)
           force: if task already exists, overwrite
           active: add the task as active (default "true")
        """

        # Check again, in case user calling from client
        if not task.startswith("task"):
            bot.exit('Task name must start with "task" (e.g., task-reddit)')

        # Ensure it's a valid type
        if task_type not in WATCHME_TASK_TYPES:
            bot.exit("%s is not a valid type: %s" % WATCHME_TASK_TYPES)

        # Validate variables provided for task
        if task_type.startswith("url"):
            from .urls import Task

        elif task_type == "psutils":
            from .psutils import Task

        elif task_type == "results":
            from .results import Task

        elif task_type == "gpu":
            from .gpu import Task

        else:
            bot.exit("task_type %s not properly added to Watcher" % task_type)

        # Convert list to dictionary
        params = self._get_params_dict(params)

        # Creating the task will validate parameters
        newtask = Task(task, params=params)

        # Exit if the new task is not valid
        if not newtask.valid:
            bot.exit("%s is not valid, will not be added." % task)

        # Write to file (all tasks get active = True added, and type)
        self._add_task(newtask, force, active)
Esempio n. 7
0
    def run(self, regexp=None, parallel=True, test=False, show_progress=True):
        '''run the watcher, which should be done via the crontab, including:

             - checks: the instantiation of the client already ensures that 
                       the watcher folder exists, and has a configuration,
                       and it loads.
             - parse: parse the tasks to be run
             - start: run the tasks that are defined for the watcher.
             - finish: after completion, commit to the repository changed files

           Parameters
           ==========
           regexp: if supplied, the user wants to run only tasks that match
                   a particular pattern         
           parallel: if True, use multiprocessing to run tasks (True)
                     each watcher should have this setup ready to go. 
           test: run in test mode (no saving of results)
           show_progress: if True, show progress bar instead of task information
                          (defaults to True)
        '''
        # Step 0: Each run session is given a fun name
        run_id = RobotNamer().generate()

        # Step 1: determine if the watcher is active.
        if self.is_active() == False and test is False:
            bot.exit('Watcher %s is not active.' % self.name)

        # Step 2: get the tasks associated with the run, a list of param dicts
        tasks = self.get_tasks()

        # Step 3: Run the tasks. This means preparing a list of funcs/params,
        # and then submitting with multiprocessing
        results = self.run_tasks(tasks, parallel, show_progress)

        # Finally, finish the runs.
        if test is False:
            self.finish_runs(results)
        else:
            # or print results to the screen
            print(json.dumps(results, indent=4))
Esempio n. 8
0
def main(args, extra):
    """list installed watchers"""
    if args.watchers is True:
        list_watcher_types()

    # Otherwise, we are listing installed watchers and tasks
    else:

        # If no watchers provided, list the watchers
        if extra is None:
            get_watchers(args.base)

        # One argument is the name of a watcher
        elif len(extra) == 1:
            list_watcher(extra[0], args.base)

        # Two arguments must be a watcher and task
        elif len(extra) == 2:
            list_task(extra[0], extra[1], args.base)

        else:
            bot.exit("Please provide none or all of <watcher> <task> to list.")
Esempio n. 9
0
    def _set_base(self, base=None, create=False):
        ''' set the base for the watcher, ensuring that it exists.

            Parameters
            ==========
            base: the base folder of watcher repos. Uses $HOME/.watchme default
            create: create the watcher if it doesn't exist (default is False)
        '''
        if base is None:
            base = WATCHME_BASE_DIR

        # Does the watcher exist?
        self.base = base
        self.repo = os.path.join(self.base, self.name)
        self.configfile = os.path.join(self.repo, 'watchme.cfg')

        # If the watcher doesn't exist and we need to create:
        if not os.path.exists(self.repo) or not os.path.exists(self.configfile):
            if create is True:
                create_watcher(self.name)
            else:
                bot.exit('Watcher %s does not exist. Use watchme create.' % self.name)
Esempio n. 10
0
    def remove_task(self, task):
        '''remove a task from the watcher repo, if it exists, and the
           watcher is not frozen.

           Parameters
           ==========
           task: the name of the task to remove
        '''
        if self.get_section(task) is not None:
            if self.is_frozen():
                bot.exit('watcher is frozen, unfreeze first.')
            self.remove_section(task)

            # If the task has a folder, remove the entire thing
            repo = os.path.join(self.repo, task)
            if os.path.exists(repo):
                shutil.rmtree(repo)

            bot.info('%s removed successfully.' % task)
            git_commit(self.repo, self.name, "REMOVE task %s" % task)

        else:
            bot.warning('Task %s does not exist.' % task)
Esempio n. 11
0
    def get_decorator(self, name):
        """instantiate a task object for a decorator. Decorators must start
           with "decorator-" and since they are run on the fly, we don't
           find them in the config.

           Parameters
           ==========
           name: the name of the task to load
        """

        task = None

        # Only psutils and gpu have decorators
        if name.startswith("decorator-psutils"):
            from .psutils import Task
        elif name.startswith("decorator-gpu"):
            from .gpu import Task
        else:
            bot.exit("Type %s is not recognized in watchers.get_decorator" %
                     name)

        task = Task(name)
        return task
Esempio n. 12
0
def main(args, extra):
    """edit the configuration for a watcher task"""
    # Required - will print help if not provided
    name = args.watcher[0]
    action = args.action[0]
    task = args.task[0]

    # Get the watcher (exits if doesn't exist)
    watcher = get_watcher(name, base=args.base)

    # Exit if the user doesn't provide a time
    if extra is None:
        bot.exit("Please provide one or more items to %s" % action)

    key = extra[0]
    value = None
    if action in ["add", "update"]:
        if len(extra) != 2:
            bot.exit("You must do watchme <watcher> add <key> <value>")
        value = extra[1]

    # Ensure the task exists
    watcher.edit_task(task, action, key, value)
Esempio n. 13
0
    def get_task(self, name, save=False):
        '''get a particular task, based on the name. This is where each type
           of class should check the "type" parameter from the config, and
           import the correct Task class.

           Parameters
           ==========
           name: the name of the task to load
           save: if saving, will be True
        '''
        self.load_config()

        task = None

        # Only sections that start with task- are considered tasks
        if name in self.config._sections and name.startswith('task'):

            # Task is an ordered dict, key value pairs are entries
            params = self.config._sections[name]

            # Get the task type (if removed, consider disabled)
            task_type = params.get('type', '')

            # If we get here, validate and prepare the task
            if task_type.startswith("url"):
                from .urls import Task

            elif task_type == 'psutils':
                from .psutils import Task

            else:
                bot.exit('Type %s not properly set up in get_task' % task_type)

            # if not valid, will return None
            task = Task(name, params, _save=save)

        return task
Esempio n. 14
0
    def _active_status(self, status='true', name=None):
        '''a general function to change the status, used by activate and
           deactivate.
 
           Parameters
           ==========
           status: must be one of true, false
           name: if not None, we are deactivating a task (not the watcher)
        '''
        # Load the configuration, if not loaded
        self.load_config()

        if name is None:
            name = 'watcher'

        # Cut out early if section not in config
        if name not in self.config._sections:
            bot.exit('%s is not a valid task or section' % name)

        if status not in ['true', 'false']:
            bot.exit('status must be true or false.')

        # Update the status and alert the user
        self.set_setting(name, 'active', status)
        self.save()

        # Return the message for the commit
        message = "ACTIVE"
        if status == "false":
            message = "DEACTIVATE"

        # Add the task name
        if name is not None:
            message = "%s task %s" % (message, name)

        bot.info('[%s|%s] active: %s' % (name, self.name, status))
        return message
Esempio n. 15
0
def main(args, extra):
    """export temporal data for a watcher
    """
    # Required - will print help if not provided
    name = args.watcher[0]
    task = args.task[0]
    filename = args.filename[0]

    if not task.startswith("task") and not task.startswith("decorator"):
        example = "watchme export watcher task-reddit result.txt"
        bot.exit('Task name must start with "task" or "decorator": %s' %
                 example)

    # Use the output file, or a temporary file
    out = args.out

    # Get the watcher to interact with, must already exist
    watcher = get_watcher(name, base=args.base, create=False)

    if out is not None:
        if os.path.exists(out) and args.force is False:
            bot.exit("%s exists! Use --force to overwrite." % out)

    # Export the data to file
    result = watcher.export_dict(task=task,
                                 filename=filename,
                                 name=name,
                                 export_json=args.json,
                                 base=args.base)

    if result is not None:

        if out is None:
            print(json.dumps(result, indent=4))
        else:
            write_json(result, out)
            bot.info("Result written to %s" % out)
Esempio n. 16
0
def main(args, extra):
    '''activate one or more watchers
    '''
    # Required - will print help if not provided
    name = args.watcher[0]
    task = args.task[0]
    filename = args.filename[0]

    if not task.startswith('task'):
        example = 'watchme add watcher task-reddit url@https://www.reddit.com'
        bot.exit('Task name must start with "task", e.g., %s' % example)

    # Use the output file, or a temporary file
    out = args.out

    # Get the watcher to interact with, must already exist
    watcher = get_watcher(name, base=args.base, create=False)

    if out is not None:
        if os.path.exists(out) and args.force is False:
            bot.exit('%s exists! Use --force to overwrite.' % out)

    # Export the data to file
    result = watcher.export_dict(task=task,
                                 filename=filename,
                                 name=name,
                                 export_json=args.json,
                                 base=args.base)

    if result != None:

        if out == None:
            print(json.dumps(result, indent=4))
        else:
            write_json(result, out)
            bot.info('Result written to %s' % out)
Esempio n. 17
0
def main(args, extra):
    """add a task for a watcher
    """
    # Required - will print help if not provided
    name = args.watcher[0]
    task = args.task[0]

    if not task.startswith("task"):
        example = "watchme add-task watcher task-cpu func@cpu_task type@psutils"
        bot.exit('Task name must start with "task", e.g., %s' % example)

    # Exit if the user doesn't provide any parameters
    if extra is None:
        bot.exit(
            "Please provide parameters to add to your watcher (key@value)")

    # Type can also be an argument
    watcher_type = args.watcher_type
    params = []
    for param in extra:
        if param.startswith("type@"):
            watcher_type = param.replace("type@", "")
        else:
            params.append(param)

    # Get the watcher to interact with, must already exist
    watcher = get_watcher(name, base=args.base, create=False)

    # Add the task. Will exit if not a valid type, or parameters
    watcher.add_task(
        task=task,
        task_type=watcher_type,
        params=params,
        force=args.force,
        active=args.active,
    )
Esempio n. 18
0
    def delete(self):
        '''delete the entire watcher, only if not protected. Cannot be undone.
        '''
        self.load_config()

        # Check for protection
        if self.is_frozen():
            bot.exit('watcher %s is frozen, unfreeze to delete.' % self.name)
        elif self.is_protected():
            bot.exit('watcher %s is protected, turn off protection to delete.' % self.name)

        repo = os.path.dirname(self.configfile)

        # Ensure repository exists before delete
        if os.path.exists(repo):
            bot.info('Removing watcher %s' % self.name)
            shutil.rmtree(repo)
        else:
            bot.exit("%s:%s doesn't exist" % (self.name, repo))
Esempio n. 19
0
def schedule(self,
             minute=12,
             hour=0,
             month='*',
             day='*',
             weekday='*',
             job=None,
             force=False):
    '''schedule the watcher to run at some frequency to update record of pages.
       By default, the task will run at 12 minutes passed midnight, daily.
       You can change the variables to change the frequency. See
       https://crontab.guru/ to get a setting that works for you.

            Hourly:	0 * * * *
            Daily:	0 0 * * *    (midnight) default
            weekly	0 0 * * 0
            monthly	0 0 1 * *
            yearly	0 0 1 1 *

       Parameters
       ==========
       minute: must be within 1 and 60, or set to "*" for every minute
       hour: must be within 0 through 23 or set to *
       month: must be within 1 and 12, or *
       day: must be between 1 and 31, or *
       weekday: must be between 0 and 6 or *
       job: if provided, assumes we are updated an existing entry.
    '''
    cron = self.get_crontab()

    # Cut out early if the job already exists, and force is false
    if self.has_schedule() and not force:
        bot.exit('%s already has a schedule. Use --force to update.' %
                 self.name)

    # Remove any previous schedules
    cron = self.remove_schedule(quiet=True)

    # minute must be between * or 0 through 59, or *
    if minute not in ['*'] + list(range(60)):
        bot.exit('minute must be in [0..59] or equal to *')

    # Hour must be between 0 through 23, or *
    if hour not in ['*'] + list(range(24)):
        bot.exit('hour must be in [0..23] or equal to *')

    # Day must be in range 1 through 31, or *
    if day not in ['*'] + list(range(1, 32)):
        bot.exit('day must be in [1..31] or equal to *')

    # Day must be in range 1 through 31, or *
    if month not in ['*'] + list(range(1, 13)):
        bot.exit('month must be in [1..12] or equal to *')

    # Day must be in range 1 through 31, or *
    if weekday not in ['*'] + list(range(7)):
        bot.exit('weekday must be in [0..6] or equal to *')

    # The command will run the watcher, watcher.cfg controls what happens
    whereis = which('watchme')
    command = '%s run %s' % (whereis, self.name)
    comment = 'watchme-%s' % self.name

    if job == None:
        job = cron.new(command=command, comment=comment)

    # Set the time, and then write the job to file
    job.setall(minute, hour, day, month, weekday)
    job.enable()
    cron.write_to_user(user=True)
    bot.info(job)

    return job
Esempio n. 20
0
def export_dict(self,
                task,
                filename,
                name=None,
                export_json=False,
                from_commit=None,
                to_commit=None,
                base=None):
    '''Export a data frame of changes for a filename over time.

       Parameters
       ==========
       task: the task folder for the watcher to look in
       name: the name of the watcher, defaults to the client's
       base: the base of watchme to look for the task folder
       from_commit: the commit to start at
       to_commit: the commit to go to
       grep: the expression to match (not used if None)
       filename: the filename to filter to. Includes all files if not specified.
    '''
    if name == None:
        name = self.name

    if base == None:
        base = self.base

    # Quit early if the task isn't there
    if not self.has_task(task) and not task.startswith('decorator'):
        bot.exit('%s is not a valid task or decorator for %s' % (task, name))

    repo = os.path.join(base, self.name)
    if not os.path.exists(repo):
        bot.exit('%s does not exist.' % repo)

    filepath = os.path.join(base, self.name, task, filename)

    # Ensure that the filename exists in the repository
    if not os.path.exists(filepath):
        bot.exit('%s does not exist for watcher %s' % (filepath, name))

    # Now filepath must be relative to the repo
    filepath = os.path.join(task, filename)

    commits = get_commits(repo=repo,
                          from_commit=from_commit,
                          to_commit=to_commit,
                          grep="ADD results %s" % task,
                          filename=filepath)

    # Keep lists of commits, dates, content
    result = {'commits': [], 'dates': [], 'content': []}

    # Empty content (or other) returns None
    for commit in commits:
        content = git_show(repo=repo, commit=commit, filename=filepath)

        if export_json is True:
            content = json.loads(content)

        # If it's a list, add it to content
        if isinstance(content, list):
            result['content'] += content
        # Otherwise, append
        else:
            result['content'].append(content)

        result['dates'].append(git_date(repo=repo, commit=commit))
        result['commits'].append(commit)
    return result
Esempio n. 21
0
    def edit_task(self, name, action, key, value=None):
        '''edit a task, meaning doing an addition (add), update (update), or
           "remove", All actions require a value other than remove.

           Parameters
           ==========
           name: the name of the task to update
           action: the action to take (update, add, remove) a parameter
           key: the key to update
           value: the value to update
        '''

        if not self.has_task(name):
            bot.exit('%s is not a task defined by %s' % (name, self.name))

        if action not in ['update', 'add', 'remove']:
            bot.exit('Action must be update, add, or remove')

        if action in ['update', 'add'] and value is None:
            bot.exit('A value must be provided for the %s action' % action)

        # Add, and it doesn't exist so it's okay
        if action == "add" and key not in self.config[name]:
            bot.info('Adding %s:%s to %s' % (key, value, name))
            self.set_setting(name, key, value)

        # Already exists, encourage user to update
        elif action == "add" and key in self.config[name]:
            bot.exit('%s already exists. Use "update" action to change.' % key)

        # Update, and it's a valid choice
        elif action == 'update' and key in self.config[name]:
            bot.info('Updating %s to %s in %s' % (key, value, name))
            self.set_setting(name, key, value)

        # Update, and it's not a valid choice
        elif action == 'update' and key not in self.config[name]:
            bot.exit('%s is not found in config, cannot be updated.' % key)

        # Remove, and it's a valid choice
        elif action == "remove" and key in self.config[name]:
            bot.info('Removing %s' % key)
            del self.config[name][key]

        # Remove, and it's not a valid choice
        elif action == "remove" and key not in self.config[name]:
            bot.exit('%s is not found in config, cannot be removed.' % key)
        self.save()
Esempio n. 22
0
def check_exists(filename):
    """a general helper function to check for existence, and exit if not found.
    """
    if not os.path.exists(filename):
        bot.exit("Cannot find %s" % filename)
Esempio n. 23
0
def check_exists(filename):
    '''a general helper function to check for existence, and exit if not found.
    '''
    if not os.path.exists(filename):
        bot.exit('Cannot find %s' % filename)
Esempio n. 24
0
 def run(self, *args, **kwargs):
     '''run should be implemented by the subclass to run the function or
        process being monitored
     '''
     bot.exit('run function must be implemented by subclass.')
Esempio n. 25
0
    def run(self, funcs, tasks):
        '''run will send a list of tasks, a tuple with arguments, through a function.
           the arguments should be ordered correctly.
        
           Parameters
           ==========
           funcs: the functions to run with multiprocessing.pool, a dictionary
                  with lookup by the task name
           tasks: a dict of tasks, each task name (key) with a 
                  tuple of arguments to process
        '''
        # Number of tasks must == number of functions
        assert len(funcs)==len(tasks)

        # Keep track of some progress for the user
        progress = 1
        total = len(tasks)

        # if we don't have tasks, don't run
        if not tasks:
            return

        # results will also have the same key to look up
        finished = dict()
        results = []

        try:
            prefix = "[%s/%s]" % (progress, total)
            if self.show_progress:
                bot.show_progress(0, total, length=35, prefix=prefix)
            pool = multiprocessing.Pool(self.workers, init_worker)

            self.start()
            for key, params in tasks.items():
                func = funcs[key]
                if not self.show_progress:
                    bot.info('Processing task %s:%s' % (key, params))
                result = pool.apply_async(multi_wrapper,
                                          multi_package(func, [params]))
                
                # Store the key with the result
                results.append((key, result))


            while len(results) > 0:
                pair = results.pop()
                key, result = pair
                result.wait()
                if self.show_progress:
                    bot.show_progress(progress, total, length=35, prefix=prefix)
                progress += 1
                prefix = "[%s/%s]" % (progress, total)
                finished[key] = result.get()

            self.end()
            pool.close()
            pool.join()

        except (KeyboardInterrupt, SystemExit):
            bot.error("Keyboard interrupt detected, terminating workers!")
            pool.terminate()
            sys.exit(1)

        except:
            bot.exit('Error running task.')

        return finished
Esempio n. 26
0
 def wait(self, *args, **kwargs):
     '''wait should be run after run to monitor the process being run.'''
     bot.exit('wait function must be implemented by subclass.')