Beispiel #1
0
    def _exporter_selected(self, exporter, regexp=None):
        '''check if an exporter is active and (if defined) passes user provided
           exporter names or regular expressions.

           Parameters
           ==========
           exporter: the exporter object to check
           regexp: an optional regular expression (or name) to check
        '''
        selected = True

        # A exporter can be None if it wasn't found
        if exporter == None:
            selected = False

        # Is the exporter not active (undefined is active)?
        active = exporter.params.get('active', 'true')
        if active == "false":
            bot.info('Exporter %s is not active.' % exporter)
            selected = False

        # The user wants to search for a custom task name
        if regexp != None:
            if not re.search(regexp, exporter):
                bot.info('Exporter %s is selected for data export.' % exporter)
                selected = False

        return selected
Beispiel #2
0
    def get_tasks(self, regexp=None, quiet=False, active=True):
        '''get the tasks for a watcher, possibly matching a regular expression.
           A list of dictionaries is returned, each holding the parameters for
           a task. "uri" will hold the task (folder) name, active

           Parameters
           ==========
           regexp: if supplied, the user wants to run only tasks that match
                   a particular pattern
           quiet: If quiet, don't print the number of tasks found
           active: only return active tasks (default True)
        '''
        self.load_config()

        tasks = []
        for section in self.config._sections:

            # Get the task based on the section name
            task = self.get_task(section)

            # Check that the task should be run, and is valid
            if task is not None:
                if self._task_selected(task, regexp, active) and task.valid:
                    tasks.append(task)

        if not quiet:
            bot.info('Found %s contender tasks.' % len(tasks))
        return tasks
Beispiel #3
0
    def get_exporters(self, regexp=None):
        '''get the exporters for a watcher, possibly matching a regular expression.
           A list of dictionaries is returned, each holding the parameters for
           an exporter. An exporter has an active attribute.

           Parameters
           ==========
           regexp: if supplied, the user wants to export to destinations
           that only match the expression specified.
        '''
        self.load_config()

        exporters = []

        for section in self.config._sections:

            # Get the exporter based on the section name
            exporter = self.get_exporter(section)

            # Check that the exporter should be used, and is valid
            if exporter != None:
                if self._exporter_selected(exporter,
                                           regexp) and exporter.valid:
                    exporters.append(exporter)

        bot.info('Found %s contender exporters.' % len(exporters))
        return exporters
Beispiel #4
0
def generate_watcher_config(path, watcher_type=None):
    '''generate a watcher config, meaning a watcher folder in the watchme
       base folder.

       Parameters
       ==========
       path: the path to the watcher repository
    '''
    check_exists(path)
    configfile = get_configfile_template()
    watcher_config = os.path.join(path, 'watchme.cfg')
    if not os.path.exists(watcher_config):
        bot.info('Generating watcher config %s' % watcher_config)
        shutil.copyfile(configfile, watcher_config)

    # Complete generation includes the watcher type
    if watcher_type is None:
        watcher_type = WATCHME_DEFAULT_TYPE
    
    # The template config has the section, but just in case
    config = read_config(configfile)
    if 'watcher' not in config.sections():
        config.add_section('watcher')
    config['watcher']['type'] = watcher_type

    # Save to file
    write_config(watcher_config, config)
Beispiel #5
0
    def _task_selected(self, task, regexp=None, active=True):
        '''check if a task is active and (if defined) passes user provided
           task names or regular expressions.

           Parameters
           ==========
           task: the task object to check
           regexp: an optional regular expression (or name) to check
           active: a task is selected if it's active (default True)
        '''
        selected = True

        # A task can be None if it wasn't found
        if task is None:
            selected = False

        # Is the task not active (undefined is active)?
        is_active = task.params.get('active', 'true')
        if is_active == "false" and active:
            bot.info('Task %s is not active.' % task)
            selected = False

        # The user wants to search for a custom task name
        if regexp is not None and task is not None:
            if not re.search(regexp, task.name):
                bot.info('Task %s is not selected to run.' % task)
                selected = False

        return selected
Beispiel #6
0
def create_watcher(name=None, watcher_type=None, base=None, exporter=None):
    '''create a watcher, meaning a folder with a configuration and
       initialized git repo.

       Parameters
       ==========
       name: the watcher to create, uses default or WATCHME_WATCHER
       watcher_type: the type of watcher to create. defaults to 
                     WATCHER_DEFAULT_TYPE
    '''
    if name == None:
        name = WATCHME_WATCHER

    if base == None:
        base = WATCHME_BASE_DIR

    # Create the repository folder
    repo = os.path.join(base, name)

    if not os.path.exists(repo):

        bot.info('Adding watcher %s...' % repo)
        mkdir_p(repo)

        # Ensure no gpg signing happens
        run_command("git --git-dir=%s/.git init" % repo)
        run_command("git --git-dir=%s/.git config commit.gpgsign false" % repo)

        # Add the watcher configuration file
        generate_watcher_config(repo, watcher_type, exporter)
        run_command("git -C %s add watchme.cfg" % repo)
        return repo

    else:
        bot.info('%s already exists: %s' % (name, repo))
Beispiel #7
0
def git_clone(repo, name=None, base=None, force=False):
    """clone a git repo to a destination. The user can provide the following
    groupings of arguments:

    base without name: destination is ignored, the repo is cloned (named as
    it is) to the base. If the folder exists, --force must be used to remove
    it first.

    base with name: destination is ignored, repo is cloned (and named based
    on name variable) to the base. The same applies for force.

    dest provided: the repo is cloned to the destination, if it doesn't exist
    and/or force is True.

    Parameters
    ==========
    name: the name of the watcher to add
    base: the base of the watcher (defaults to $HOME/.watchme
    force: remove first if already exists
    """
    if base is None:
        base = WATCHME_BASE_DIR

    # Derive the repository name
    if name is None:
        name = os.path.basename(repo).replace(".git", "")

    # First clone to temporary directory
    tmpdir = get_tmpdir()
    command = "git clone %s %s" % (repo, tmpdir)
    bot.debug(command)
    run_command(command)

    # ensure there is a watchme.cfg
    if not os.path.exists(os.path.join(tmpdir, "watchme.cfg")):
        shutil.rmtree(tmpdir)
        bot.exit("No watchme.cfg found in %s, aborting." % repo)

    # If it's good, move the repository
    dest = os.path.join(base, name)

    # Don't allow for overwrite
    if os.path.exists(dest):
        if force is False:
            shutil.rmtree(tmpdir)
            bot.exit("%s exists. Use --force to overwrite" % dest)
        else:
            shutil.rmtree(dest)

    # Move the repository there
    shutil.move(tmpdir, dest)

    # Ensure we don't sign gpg key
    run_command("git --git-dir=%s/.git config commit.gpgsign false" % dest)
    bot.info("Added watcher %s" % name)
Beispiel #8
0
def clear_schedule(self):
    '''clear all cron jobs associated with the watcher. To remove jobs
       associated with a single watcher, use remove_schedule
    '''
    cron = self.get_crontab()
    bot.info('Clearing jobs associated with all watchers')
    cron.remove_all(comment='watchme-*')

    # Save new cron
    cron.write_to_user(user=True)
    return cron
Beispiel #9
0
def _get_config(name, exporter):
    '''shared function to return a file in the config directory
    '''
    exporter_path = exporter or ''

    template_path = os.path.join(get_installdir(), 'config', 'templates',
                                 exporter_path, name)

    if os.path.exists(template_path):
        return os.path.abspath(template_path)
    else:
        bot.info(
            'The exporter specified does not exist : %s. The task was created with no exporters. '
            % exporter)
        return os.path.abspath(
            os.path.join(get_installdir(), 'config', 'templates', name))
Beispiel #10
0
def create_watcher_base(name=None, base=None):
    """create a watch base and default repo, if it doesn't already exist.

    Parameters
    ==========
    name: the watcher to create, uses default or WATCHME_WATCHER
    base: the watcher base, defaults to WATCHME_BASE_DIR
    """
    if base is None:
        base = WATCHME_BASE_DIR

    if name is None:
        name = WATCHME_WATCHER

    if not os.path.exists(base):
        bot.info("Creating %s..." % base)
        mkdir_p(base)
Beispiel #11
0
def list_watcher(watcher, base=None):
    '''list the contents (tasks) of a single watcher.

       Parameters
       ==========
       base: the watchme base, defaults to $HOME/.watchme
    '''
    if base == None:
        base = WATCHME_BASE_DIR

    repo = os.path.join(base, watcher)
    if os.path.exists(repo):
        files = os.listdir(repo)
        bot.custom(prefix="task:", message="%s" % repo, color="CYAN")
        bot.info('\n  '.join(files))
    else:
        bot.exit('%s does not exist.' % base)
Beispiel #12
0
def print_section(self, section):
    '''print a section (usually a task) from a configuration file,
       if it exists.

       Parameters
       ==========
       section: the name of the section (task)
    '''
    self.load_config()

    if section in self.config:
        bot.info('[%s]' % section)
        for key in self.config[section]:
            value = self.config[section][key]
            bot.custom(prefix=key, message=" = %s" % value, color="CYAN")
    else:
        bot.exit('%s is not a valid section.' % section)
Beispiel #13
0
def print_add_task(self, task):
    '''assemble a task section into a command that can create/add it.

       Parameters
       ==========
       task: the name of the task to inspect
    '''
    self.load_config()

    if task in self.config:
        command = "watchme add %s" % task
        for key in self.config[task]:
            value = self.config[task][key]
            command = "%s %s@%s" % (command, key, value)

        bot.info(command)
    else:
        bot.exit('%s is not a valid task.' % task)
Beispiel #14
0
def get_watchers(base=None, quiet=False):
    '''list the watchers installed at a base. If base is not defined,
       the default base is used.

       Parameters
       ==========
       base: the watchme base, defaults to $HOME/.watchme
    '''
    if base == None:
        base = WATCHME_BASE_DIR

    if os.path.exists(base):
        watchers = os.listdir(base)
        if quiet == False:
            bot.info('\n'.join(watchers))
        return watchers
    else:
        bot.exit('%s does not exist.' % base)
Beispiel #15
0
def get_watchers(base=None, quiet=False):
    """list the watchers installed at a base. If base is not defined,
    the default base is used.

    Parameters
    ==========
    base: the watchme base, defaults to $HOME/.watchme
    """
    if base is None:
        base = WATCHME_BASE_DIR

    if os.path.exists(base):
        watchers = os.listdir(base)
        if not quiet:
            bot.info("\n".join(watchers))
        return watchers
    else:
        bot.exit("%s does not exist." % base)
Beispiel #16
0
def _general_list(path, prefix="path", base=None):
    """a shared function for listing (and returning) files.

    Parameters
    ==========
    path: the full path to list, if it exists
    prefix: a prefix to print for the type
    base: the watchme base, defaults to $HOME/.watchme
    """
    if base is None:
        base = WATCHME_BASE_DIR

    if os.path.exists(path):
        files = os.listdir(path)
        bot.custom(prefix="%s:" % prefix, message="%s" % path, color="CYAN")
        bot.info("\n  ".join(files))
    else:
        bot.exit("%s does not exist." % base)
Beispiel #17
0
    def delete(self):
        '''delete the entire watcher, only if not protected. Cannot be undone.
        '''
        self.load_config()

        # Check for protection
        if self.is_frozen():
            bot.exit('watcher %s is frozen, unfreeze to delete.' % self.name)
        elif self.is_protected():
            bot.exit('watcher %s is protected, turn off protection to delete.' % self.name)

        repo = os.path.dirname(self.configfile)

        # Ensure repository exists before delete
        if os.path.exists(repo):
            bot.info('Removing watcher %s' % self.name)
            shutil.rmtree(repo)
        else:
            bot.exit("%s:%s doesn't exist" % (self.name, repo))
Beispiel #18
0
def remove_schedule(self, name=None, quiet=False):
    '''remove a scheduled item from crontab, this is based on the watcher
       name. By default, we use the watcher instance name, however you
       can specify a custom name if desired.
    '''
    if name == None:
        name = self.name

    cron = self.get_crontab()

    comment = 'watchme-%s' % self.name
    found = False
    for job in cron.find_comment(comment):
        found = True
        cron.remove(job)

    if found is True:
        bot.info('Removed schedule for watcher %s' % name)
        cron.write_to_user(user=True)
    return cron
Beispiel #19
0
def get_commits(repo,
                from_commit=None,
                to_commit=None,
                grep=None,
                filename=None):
    """get commits, starting from and going to a particular commit. if grep
    is defined, filter commits to those with messages that match that
    particular expression

    Parameters
    ==========
    from_commit: the commit to start at
    to_commit: the commit to go to
    grep: the expression to match (not used if None)
    filename: the filename to filter to. Includes all files if not specified.
    """
    command = 'git log --all --oneline --pretty=tformat:"%H"'

    # The earliest commit
    if from_commit is None:
        from_commit = get_earliest_commit()

    # The latest commit
    if to_commit is None:
        to_commit = get_latest_commit()

    # A regular expression to search for (and filter commits)
    if grep is not None:
        command = '%s --grep "ADD results"' % command

    # Add the commit range
    command = "%s %s..%s" % (command, from_commit, to_commit)

    if filename is not None:
        command = "%s -- %s" % (command, filename)

    bot.info(command)
    results = run_command(command)["message"]
    results = [x for x in results.split("\n") if x]
    return results
Beispiel #20
0
    def run_tasks(self, queue, parallel=True, show_progress=True):
        '''this run_tasks function takes a list of Task objects, each
           potentially a different kind of task, and extracts the parameters
           with task.export_params(), and the running function with 
           task.export_func(), and hands these over to the multiprocessing
           worker. It's up to the Task to return some correct function
           from it's set of task functions that correspond with the variables.

           Examples
           ========

           funcs
           {'task-reddit-hpc': <function watchme.watchers.urls.tasks.get_task>}

           tasks
           {'task-reddit-hpc': [('url', 'https://www.reddit.com/r/hpc'),
                                ('active', 'true'),
                                ('type', 'urls')]}
        '''
        if parallel is True:
            return self._run_parallel(queue, show_progress)

        # Otherwise, run in serial
        results = {}

        # Progressbar
        total = len(queue)
        progress = 1

        for task in queue:
            prefix = "[%s:%s/%s]" % (task.name, progress, total)
            if show_progress is True:
                bot.show_progress(progress, total, length=35, prefix=prefix)
            else:
                bot.info('Running %s' % prefix)
            results[task.name] = task.run()
            progress += 1

        return results
Beispiel #21
0
    def remove_task(self, task):
        '''remove a task from the watcher repo, if it exists, and the
           watcher is not frozen.

           Parameters
           ==========
           task: the name of the task to remove
        '''
        if self.get_section(task) is not None:
            if self.is_frozen():
                bot.exit('watcher is frozen, unfreeze first.')
            self.remove_section(task)

            # If the task has a folder, remove the entire thing
            repo = os.path.join(self.repo, task)
            if os.path.exists(repo):
                shutil.rmtree(repo)

            bot.info('%s removed successfully.' % task)
            git_commit(self.repo, self.name, "REMOVE task %s" % task)

        else:
            bot.warning('Task %s does not exist.' % task)
Beispiel #22
0
def main(args, extra):
    """export temporal data for a watcher
    """
    # Required - will print help if not provided
    name = args.watcher[0]
    task = args.task[0]
    filename = args.filename[0]

    if not task.startswith("task") and not task.startswith("decorator"):
        example = "watchme export watcher task-reddit result.txt"
        bot.exit('Task name must start with "task" or "decorator": %s' %
                 example)

    # Use the output file, or a temporary file
    out = args.out

    # Get the watcher to interact with, must already exist
    watcher = get_watcher(name, base=args.base, create=False)

    if out is not None:
        if os.path.exists(out) and args.force is False:
            bot.exit("%s exists! Use --force to overwrite." % out)

    # Export the data to file
    result = watcher.export_dict(task=task,
                                 filename=filename,
                                 name=name,
                                 export_json=args.json,
                                 base=args.base)

    if result is not None:

        if out is None:
            print(json.dumps(result, indent=4))
        else:
            write_json(result, out)
            bot.info("Result written to %s" % out)
Beispiel #23
0
    def _active_status(self, status='true', name=None):
        '''a general function to change the status, used by activate and
           deactivate.
 
           Parameters
           ==========
           status: must be one of true, false
           name: if not None, we are deactivating a task (not the watcher)
        '''
        # Load the configuration, if not loaded
        self.load_config()

        if name is None:
            name = 'watcher'

        # Cut out early if section not in config
        if name not in self.config._sections:
            bot.exit('%s is not a valid task or section' % name)

        if status not in ['true', 'false']:
            bot.exit('status must be true or false.')

        # Update the status and alert the user
        self.set_setting(name, 'active', status)
        self.save()

        # Return the message for the commit
        message = "ACTIVE"
        if status == "false":
            message = "DEACTIVATE"

        # Add the task name
        if name is not None:
            message = "%s task %s" % (message, name)

        bot.info('[%s|%s] active: %s' % (name, self.name, status))
        return message
Beispiel #24
0
def main(args, extra):
    '''activate one or more watchers
    '''
    # Required - will print help if not provided
    name = args.watcher[0]
    task = args.task[0]
    filename = args.filename[0]

    if not task.startswith('task'):
        example = 'watchme add watcher task-reddit url@https://www.reddit.com'
        bot.exit('Task name must start with "task", e.g., %s' % example)

    # Use the output file, or a temporary file
    out = args.out

    # Get the watcher to interact with, must already exist
    watcher = get_watcher(name, base=args.base, create=False)

    if out is not None:
        if os.path.exists(out) and args.force is False:
            bot.exit('%s exists! Use --force to overwrite.' % out)

    # Export the data to file
    result = watcher.export_dict(task=task,
                                 filename=filename,
                                 name=name,
                                 export_json=args.json,
                                 base=args.base)

    if result != None:

        if out == None:
            print(json.dumps(result, indent=4))
        else:
            write_json(result, out)
            bot.info('Result written to %s' % out)
Beispiel #25
0
    def edit_task(self, name, action, key, value=None):
        '''edit a task, meaning doing an addition (add), update (update), or
           "remove", All actions require a value other than remove.

           Parameters
           ==========
           name: the name of the task to update
           action: the action to take (update, add, remove) a parameter
           key: the key to update
           value: the value to update
        '''

        if not self.has_task(name):
            bot.exit('%s is not a task defined by %s' % (name, self.name))

        if action not in ['update', 'add', 'remove']:
            bot.exit('Action must be update, add, or remove')

        if action in ['update', 'add'] and value is None:
            bot.exit('A value must be provided for the %s action' % action)

        # Add, and it doesn't exist so it's okay
        if action == "add" and key not in self.config[name]:
            bot.info('Adding %s:%s to %s' % (key, value, name))
            self.set_setting(name, key, value)

        # Already exists, encourage user to update
        elif action == "add" and key in self.config[name]:
            bot.exit('%s already exists. Use "update" action to change.' % key)

        # Update, and it's a valid choice
        elif action == 'update' and key in self.config[name]:
            bot.info('Updating %s to %s in %s' % (key, value, name))
            self.set_setting(name, key, value)

        # Update, and it's not a valid choice
        elif action == 'update' and key not in self.config[name]:
            bot.exit('%s is not found in config, cannot be updated.' % key)

        # Remove, and it's a valid choice
        elif action == "remove" and key in self.config[name]:
            bot.info('Removing %s' % key)
            del self.config[name][key]

        # Remove, and it's not a valid choice
        elif action == "remove" and key not in self.config[name]:
            bot.exit('%s is not found in config, cannot be removed.' % key)
        self.save()
Beispiel #26
0
def list_watcher_types():
    """list the exporter options provided by watchme"""
    bot.custom(prefix="watchme:", message="watcher task types", color="CYAN")
    bot.info("\n  ".join(WATCHME_TASK_TYPES))
Beispiel #27
0
def schedule(self,
             minute=12,
             hour=0,
             month='*',
             day='*',
             weekday='*',
             job=None,
             force=False):
    '''schedule the watcher to run at some frequency to update record of pages.
       By default, the task will run at 12 minutes passed midnight, daily.
       You can change the variables to change the frequency. See
       https://crontab.guru/ to get a setting that works for you.

            Hourly:	0 * * * *
            Daily:	0 0 * * *    (midnight) default
            weekly	0 0 * * 0
            monthly	0 0 1 * *
            yearly	0 0 1 1 *

       Parameters
       ==========
       minute: must be within 1 and 60, or set to "*" for every minute
       hour: must be within 0 through 23 or set to *
       month: must be within 1 and 12, or *
       day: must be between 1 and 31, or *
       weekday: must be between 0 and 6 or *
       job: if provided, assumes we are updated an existing entry.
    '''
    cron = self.get_crontab()

    # Cut out early if the job already exists, and force is false
    if self.has_schedule() and not force:
        bot.exit('%s already has a schedule. Use --force to update.' %
                 self.name)

    # Remove any previous schedules
    cron = self.remove_schedule(quiet=True)

    # minute must be between * or 0 through 59, or *
    if minute not in ['*'] + list(range(60)):
        bot.exit('minute must be in [0..59] or equal to *')

    # Hour must be between 0 through 23, or *
    if hour not in ['*'] + list(range(24)):
        bot.exit('hour must be in [0..23] or equal to *')

    # Day must be in range 1 through 31, or *
    if day not in ['*'] + list(range(1, 32)):
        bot.exit('day must be in [1..31] or equal to *')

    # Day must be in range 1 through 31, or *
    if month not in ['*'] + list(range(1, 13)):
        bot.exit('month must be in [1..12] or equal to *')

    # Day must be in range 1 through 31, or *
    if weekday not in ['*'] + list(range(7)):
        bot.exit('weekday must be in [0..6] or equal to *')

    # The command will run the watcher, watcher.cfg controls what happens
    whereis = which('watchme')
    command = '%s run %s' % (whereis, self.name)
    comment = 'watchme-%s' % self.name

    if job == None:
        job = cron.new(command=command, comment=comment)

    # Set the time, and then write the job to file
    job.setall(minute, hour, day, month, weekday)
    job.enable()
    cron.write_to_user(user=True)
    bot.info(job)

    return job
Beispiel #28
0
    def run(self, funcs, tasks):
        '''run will send a list of tasks, a tuple with arguments, through a function.
           the arguments should be ordered correctly.
        
           Parameters
           ==========
           funcs: the functions to run with multiprocessing.pool, a dictionary
                  with lookup by the task name
           tasks: a dict of tasks, each task name (key) with a 
                  tuple of arguments to process
        '''
        # Number of tasks must == number of functions
        assert (len(funcs) == len(tasks))

        # Keep track of some progress for the user
        progress = 1
        total = len(tasks)

        # if we don't have tasks, don't run
        if len(tasks) == 0:
            return

        # results will also have the same key to look up
        finished = dict()
        results = []

        try:
            prefix = "[%s/%s]" % (progress, total)
            if self.show_progress:
                bot.show_progress(0, total, length=35, prefix=prefix)
            pool = multiprocessing.Pool(self.workers, init_worker)

            self.start()
            for key, params in tasks.items():
                func = funcs[key]
                if not self.show_progress:
                    bot.info('Processing task %s:%s' % (key, params))
                result = pool.apply_async(multi_wrapper,
                                          multi_package(func, [params]))

                # Store the key with the result
                results.append((key, result))

            while len(results) > 0:
                pair = results.pop()
                key, result = pair
                result.wait()
                if self.show_progress:
                    bot.show_progress(progress,
                                      total,
                                      length=35,
                                      prefix=prefix)
                progress += 1
                prefix = "[%s/%s]" % (progress, total)
                finished[key] = result.get()

            self.end()
            pool.close()
            pool.join()

        except (KeyboardInterrupt, SystemExit):
            bot.error("Keyboard interrupt detected, terminating workers!")
            pool.terminate()
            sys.exit(1)

        except Exception as e:
            bot.error(e)

        return finished