Esempio n. 1
0
        def wrapper(*fargs, **fkwargs):

            # Typically the task folder is the index, so we will create
            # indices that start with decorator-<task>
            result = None

            # The watcher is required, first keyword argument
            if not args:
                bot.error(
                    "A watcher name is required for the psutils decorator.")
                return result

            # Get a watcher to save results to
            watcher = get_watcher(args[0], create=kwargs.get("create", False))

            # Start the function
            runner = ProcessRunner(
                seconds=kwargs.get("seconds", 3),
                skip=kwargs.get("skip", []),
                include=kwargs.get("include", []),
                only=kwargs.get("only", []),
            )

            runner.run(func, *fargs, **fkwargs)
            result = runner.wait("monitor_pid_task")

            # Save results (finishing runs) - key is folder created
            name = kwargs.get("name", func.__name__)
            key = "decorator-psutils-%s" % name
            results = {key: runner.timepoints}
            watcher.finish_runs(results)

            # Return function result to the user
            return result
Esempio n. 2
0
    def export_runs(self, results, exporters):
        ''' export data retrieved to the set of exporters defined and active. 
            maybe an export flag could be set to choose to run + export?
        '''
        for name, result in results.items():

            task = self.get_task(name, save=True)

            # Case 1. The result is a list
            if isinstance(result, list):

                # Get rid of Nones, if the user accidentally added
                result = [r for r in result if r]

                if len(result) == 0:
                    bot.error('%s returned empty list of results.' % name)

                # for a json, or a list of paths, ignore for now.
                elif not (task.params.get('save_as') == 'json'
                          or os.path.exists(result[0])):
                    for exporter in exporters:
                        bot.debug('Exporting list to ' + exporter.name)
                        exporter._save_text_list(name, result)

            # Case 2. The result is a string
            elif isinstance(result, str):

                # if it's a path to a file, ignore it.
                if not (os.path.exists(result)):
                    exporter._save_text(result)
Esempio n. 3
0
def clone_watcher(repo, base=None, name=None):
    """clone a watcher from Github (or other version control with git)
    meaning that we clone to a temporary folder, and then move
    to a new folder. By default, the user gets all tasks associated
    with the watcher, along with the git folder so that removing
    is also done with version control.

    Parameters
    ==========
    repo: the repository to clone
    base: the watchme base, defaults to $HOME/.watchme
    name: a new name for the watcher, if a rename is desired.
    """
    if base is None:
        base = WATCHME_BASE_DIR

    # clone_watcher(repo=repo, base=args.base, name=extra)
    # STOPPED HERE - need to test this.

    # Validate the repository address
    if not re.search("^git@|http", repo):
        bot.exit("Please provide a valid url to git repository")

    # if the name is None, use the repo name
    if name is None:
        name = os.path.basename(repo)

    # Ensure we aren't overwriting
    dest = os.path.join(base, name)
    if os.path.exists(dest):
        bot.exit("%s already exists, choose a different watcher name." % name)

    clone_dest = get_tmpdir(prefix="watchme-clone", create=False)
    run_command("git clone %s %s" % (repo, clone_dest))

    # Valid by default - will copy over if valid
    valid = True

    # Iterate over watchers
    watchers = os.listdir(clone_dest)
    for watcher in watchers:
        watcher = os.path.join(clone_dest, watcher)
        tasks = os.listdir(watcher)

        # Check that tasks include watchme.cfg
        for task in tasks:
            if not task.startswith("task"):
                continue
            task_folder = os.path.join(watcher, task)
            content = os.listdir(task_folder)
            if "watcher.cfg" not in content:
                bot.error("%s is missing a watcher.cfg" % task)
                valid = False
                break

    if valid:
        shutil.move(clone_dest, dest)

    if os.path.exists(clone_dest):
        shutil.rmtree(clone_dest)
Esempio n. 4
0
 def run(self):
     """run an isolated task, meaning no update or communication with
        the watcher. This will return the raw result.
     """
     params = self.export_params()
     func = self.export_func()
     if func is not None:
         return func(**params)
     bot.error("Cannot find function.")
Esempio n. 5
0
 def _validate(self):
     """additional validation function, called by validate() of
     superclass. Here we assume all required self.params are included.
     If an parameter is found to be invalid, self.valid should be set
     to False
     """
     # The url must begin with http
     if not self.params["url"].startswith("http"):
         bot.error("%s is not a valid url." % self.params["url"])
         self.valid = False
Esempio n. 6
0
 def _validate(self):
     '''additional validation function, called by validate() of 
        superclass. Here we assume all required self.params are included.
        If an parameter is found to be invalid, self.valid should be set
        to False
     '''
     # The url must begin with http
     if not self.params['url'].startswith('http'):
         bot.error('%s is not a valid url.' % self.params['url'])
         self.valid = False
Esempio n. 7
0
def get_url_selection(url, **kwargs):
    """select some content from a page dynamically, using selenium.

       Parameters
       ==========
       kwargs: a dictionary of key, value pairs provided by the user
    """

    results = None
    selector = kwargs.get("selection", None)
    headers = get_headers(kwargs)

    if selector is None:
        bot.error("You must define the selection (e.g., [email protected]")
        return results

    # Does the user want to get text?
    get_text = False
    if kwargs.get("get_text") is not None:
        get_text = True

    # Are we searching for a regular expression in the result?
    regex = kwargs.get("regex")

    # Does the user want to get one or more attributes?
    attributes = kwargs.get("attributes", None)
    if attributes is not None:
        attributes = attributes.split(",")

    # User can pass a parameter like url_param_<name>
    # url_param_page=1,2,3,4,5,6,7,8,9
    paramsets = get_params(kwargs)

    # Each is a dictionary of values
    results = []
    for params in paramsets:

        # Get the page
        results += get_results(
            url=url,
            selector=selector,
            headers=headers,
            attributes=attributes,
            params=params,
            get_text=get_text,
            regex=regex,
        )

    # No results
    if not results:
        results = None

    return results
Esempio n. 8
0
def get_url_selection(url, **kwargs):
    '''select some content from a page dynamically, using selenium.

       Parameters
       ==========
       kwargs: a dictionary of key, value pairs provided by the user
    '''

    results = None
    selector = kwargs.get('selection', None)
    headers = get_headers(kwargs)

    if selector == None:
        bot.error('You must define the selection (e.g., [email protected]')
        return results

    # Does the user want to get text?
    get_text = False
    if kwargs.get('get_text') != None:
        get_text = True

    # Does the user want to capture a certain value?
    regex = kwargs.get('regex')

    # Does the user want to get one or more attributes?
    attributes = kwargs.get('attributes', None)
    if attributes != None:
        attributes = attributes.split(',')

    # User can pass a parameter like url_param_<name>
    # url_param_page=1,2,3,4,5,6,7,8,9
    paramsets = get_params(kwargs)

    # Each is a dictionary of values
    results = []
    for params in paramsets:

        # Get the page
        results += get_results(url=url,
                               selector=selector,
                               headers=headers,
                               attributes=attributes,
                               params=params,
                               get_text=get_text,
                               regex=regex)

    # No results
    if len(results) == 0:
        results = None

    return results
Esempio n. 9
0
    def validate(self):
        """validate the parameters set for the Task. Exit if there are any
           errors. Ensure required parameters are defined, and have correct
           values.
        """
        self.valid = True

        for param in self.required_params:
            if param not in self.params:
                bot.error("Missing required parameter: %s" % param)
                self.valid = False

        # Call subclass validation function
        self._validate()
Esempio n. 10
0
    def _write_to_pushgateway(self, result):
        ''' writes data to the pushgateway
 
           Parameters
           ==========
           result: the result object to save
        '''
        g = Gauge(self.name.replace('-', ':'), '', registry=self.registry)
        g.set(result)
        
        try:
            push_to_gateway(self.params['url'], job='watchme', registry=self.registry)
        except:
            bot.error('An exception occurred while trying to export data using %s' % self.name)
Esempio n. 11
0
def mkdir_p(path):
    """mkdir_p attempts to get the same functionality as mkdir -p

    Paramters
    =========
    param path: the path to create.
    """
    try:
        os.makedirs(path)
    except OSError as e:
        if e.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            bot.error("Error creating path %s, exiting." % path)
            sys.exit(1)
Esempio n. 12
0
def _get_pid(name):
    '''used to get the pid of a process, by name
       
       Parameters
       ==========
       name: the name of the process to get.
    '''
    try:
        pid = check_output(["pidof", name])
        pid = pid.decode('utf-8').strip('\n').split(' ')
        if len(pid) > 1:
            bot.warning("More than one pid found for %s, using first." % name)
        pid = int(pid[0])

    except CalledProcessError:
        bot.error("%s does not exist." % name)
        pid = None
    return pid
Esempio n. 13
0
def getenv(variable_key, default=None, required=False, silent=True):
    """attempt to get an environment variable. If the variable
    is not found, None is returned.

    Parameters
    ==========
    variable_key: the variable name
    required: exit with error if not found
    silent: Do not print debugging information for variable
    """
    variable = os.environ.get(variable_key, default)
    if variable is None and required:
        bot.error("Cannot find environment variable %s, exiting." %
                  variable_key)
        sys.exit(1)

    if not silent and variable is not None:
        bot.verbose("%s found as %s" % (variable_key, variable))

    return variable
Esempio n. 14
0
    def _get_params_dict(self, pairs):
        '''iterate through parameters, make keys lowercase, and ensure
           valid format.

           Parameters
           ==========
           pairs: a list of key@value pairs to set.
        '''
        params = {}
        for pair in pairs:
            if "@" not in pair:
                bot.exit('incorrectly formatted param, must be key@value')
            key, value = pair.split('@', 1)
            key = key.lower()

            # All tasks are not allowed to have default params
            if key in WATCHME_NOTALLOWED_PARAMS:
                bot.error('%s is a default, not allowed setting by task.' % key)
                self.valid = False
            params[key] = value
        return params
Esempio n. 15
0
def post_task(url, **kwargs):
    '''a simple task to use requests to post to. By default, we return json.

       Parameters
       ==========

       REQUIRED:
           url: a url to post to
    '''
    results = []

    # The json params can vary, but headers do not
    jsonlist = get_params(kwargs, key='json_param_')
    headers = get_headers(kwargs)

    # Loop through lists of json and headers
    for params in jsonlist:

        # Get the post response and proceed if successful
        response = requests.post(url, json=params, headers=headers)
        if response.status_code == 200:

            save_as = kwargs.get('save_as', 'json')

            # Returning the result as json will detect dictionary, and save json
            if save_as == "json":
                result = response.json()

            # Otherwise, we return text
            else:
                result = response.text

        else:
            bot.error("%s: %s" % (response.status_code, response.reason))

    # Return None if no results found
    if len(results) == 0:
        results = None

    return results
Esempio n. 16
0
def post_task(url, **kwargs):
    """a simple task to use requests to post to. By default, we return json.

       Parameters
       ==========

       REQUIRED:
           url: a url to post to
    """
    results = []

    # The json params can vary, but headers do not
    jsonlist = get_params(kwargs, key="json_param_")
    headers = get_headers(kwargs)

    # Loop through lists of json and headers
    for params in jsonlist:

        # Get the post response and proceed if successful
        response = requests.post(url, json=params, headers=headers)
        if response.status_code == 200:

            # Parse the response per the user's request
            result = parse_success_response(response, kwargs)
            results.append(result)

        else:
            bot.error("%s: %s" % (response.status_code, response.reason))

    results = [x for x in results if x]

    # Return None if no results found
    if not results:
        results = None

    return results
Esempio n. 17
0
    def write_results(self, result, repo):
        '''an entrypoint function for a general task. By default, we parse
           results based on the result type. Any particular subclass of the
           TaskBase can modify or extend these functions.

           Parameters
           ==========
           result: the result object to parse
           repo: the repo base (watcher.repo)
        '''
        files = []

        # Case 1. The result is a list
        if isinstance(result, list):

            # Get rid of Nones, if the user accidentally added
            result = [r for r in result if r]

            if len(result) == 0:
                bot.error('%s returned empty list of results.' % self.name)

            # multiple jsons save specified, regardless
            elif self.params.get('save_as') == 'jsons':
                bot.debug('Saving single list as multiple json...')
                files += self._save_json_list(result, repo)

            # json output is specified by the user or we find dict results
            elif self.params.get('save_as') == 'json' or isinstance(
                    result[0], dict):
                bot.debug('Saving single list as one json...')
                files.append(self._save_json(result, repo))

            # Otherwise, sniff for list of paths
            elif os.path.exists(result[0]):
                bot.debug('Found list of paths...')
                files += self._save_files_list(result, repo)

            # Finally, assume just writing text to file
            else:
                bot.debug('Saving content from list to file...')
                files += self._save_text_list(result, repo)

        # Case 2. The result is a string
        elif isinstance(result, str):

            # if it's a path to a file, just save to repository
            if os.path.exists(result):
                files.append(self._save_file(result, repo))

            # Otherwise, it's a string that needs to be saved to file
            else:
                files.append(self._save_text(result, repo))

        # Case 3. The result is a dictionary
        elif isinstance(result, dict):
            files.append(self._save_json(result, repo))

        elif result == None:
            bot.error('Result for task %s is None' % self.name)

        elif hasattr(self, '_write_results'):
            return self._write_results(result)

        else:
            bot.error('Unsupported result format %s' % type(result))

        # Get rid of None results (don't check excessively for None above)
        files = [f for f in files if f]
        return files
Esempio n. 18
0
    def run(self, funcs, tasks):
        '''run will send a list of tasks, a tuple with arguments, through a function.
           the arguments should be ordered correctly.
        
           Parameters
           ==========
           funcs: the functions to run with multiprocessing.pool, a dictionary
                  with lookup by the task name
           tasks: a dict of tasks, each task name (key) with a 
                  tuple of arguments to process
        '''
        # Number of tasks must == number of functions
        assert (len(funcs) == len(tasks))

        # Keep track of some progress for the user
        progress = 1
        total = len(tasks)

        # if we don't have tasks, don't run
        if len(tasks) == 0:
            return

        # results will also have the same key to look up
        finished = dict()
        results = []

        try:
            prefix = "[%s/%s]" % (progress, total)
            if self.show_progress:
                bot.show_progress(0, total, length=35, prefix=prefix)
            pool = multiprocessing.Pool(self.workers, init_worker)

            self.start()
            for key, params in tasks.items():
                func = funcs[key]
                if not self.show_progress:
                    bot.info('Processing task %s:%s' % (key, params))
                result = pool.apply_async(multi_wrapper,
                                          multi_package(func, [params]))

                # Store the key with the result
                results.append((key, result))

            while len(results) > 0:
                pair = results.pop()
                key, result = pair
                result.wait()
                if self.show_progress:
                    bot.show_progress(progress,
                                      total,
                                      length=35,
                                      prefix=prefix)
                progress += 1
                prefix = "[%s/%s]" % (progress, total)
                finished[key] = result.get()

            self.end()
            pool.close()
            pool.join()

        except (KeyboardInterrupt, SystemExit):
            bot.error("Keyboard interrupt detected, terminating workers!")
            pool.terminate()
            sys.exit(1)

        except Exception as e:
            bot.error(e)

        return finished
Esempio n. 19
0
    def finish_runs(self, results):
        '''finish runs should take a dictionary of results, with keys as the
           folder name, and for each, depending on the result type,
           write the result to file (or update file) and then commit
           to git.

           Parameters
           ==========
           results: a dictionary of tasks, with keys as the task name, and
                    values as the result.
        '''
        for name, result in results.items():
            task_folder = os.path.join(self.repo, name)
            task = self.get_task(name, save=True)

            # Files to be added via Git after
            files = []

            # Ensure that the task folder exists
            if not os.path.exists(task_folder):
                mkdir_p(task_folder)
                git_add(self.repo, task_folder)

            # Case 1. The result is a list
            if isinstance(result, list):
                # Get rid of Nones, if the user accidentally added
                result = [r for r in result if r]

                if len(result) == 0:
                    bot.error('%s returned empty list of results.' % name)

                # json output is specified
                elif task.params.get('save_as') == 'json':
                    bot.debug('Saving single list as one json...')
                    files.append(task._save_json(result, self.repo))

                elif task.params.get('save_as') == 'json':
                    bot.debug('Saving single list as multiple json...')
                    files += task._save_json_list(result, self.repo)

                # Otherwise, sniff for list of paths
                elif os.path.exists(result[0]):
                    bot.debug('Found list of paths...')
                    files += task._save_files_list(result, self.repo)

                # Finally, assume just writing text to file
                else:
                    bot.debug('Saving content from list to file...')
                    files += task._save_text_list(result, self.repo)

            # Case 2. The result is a string
            elif isinstance(result, str):
                # if it's a path to a file, just save to repository
                if os.path.exists(result):
                    files.append(task._save_file(result, self.repo))

                # Otherwise, it's a string that needs to be saved to file
                else:
                    files.append(task._save_text(result, self.repo))

            # Case 3. The result is a dictionary
            elif isinstance(result, dict):
                files.append(task._save_json(result, self.repo))

            elif result == None:
                bot.error('Result for task %s is None' % name)

            else:
                bot.error('Unsupported result format %s' % type(result))

            # Get rid of None results (don't check excessively for None above)
            files = [f for f in files if f]

            # Add files to git, and commit
            files.append(write_timestamp(repo=self.repo, task=name))
            git_add(repo=self.repo, files=files)
            git_commit(repo=self.repo,
                       task=self.name,
                       message="ADD results %s" % name)
Esempio n. 20
0
    def write_results(self, result, repo):
        """an entrypoint function for a general task. By default, we parse
           results based on the result type. Any particular subclass of the
           TaskBase can modify or extend these functions.

           Parameters
           ==========
           result: the result object to parse
           repo: the repo base (watcher.repo)
        """
        files = []

        # Case 1. The result is a list
        if isinstance(result, list):

            # Get rid of Nones, if the user accidentally added
            result = [r for r in result if r]

            if len(result) == 0:
                bot.error("%s returned empty list of results." % self.name)

            # multiple jsons save specified, regardless
            elif self.params.get("save_as") == "jsons":
                bot.debug("Saving single list as multiple json...")
                files += self._save_json_list(result, repo)

            # json output is specified by the user or we find dict results
            elif self.params.get("save_as") == "json" or isinstance(
                    result[0], dict):
                bot.debug("Saving single list as one json...")
                files.append(self._save_json(result, repo))

            # Otherwise, sniff for list of paths
            elif os.path.exists(result[0]):
                bot.debug("Found list of paths...")
                files += self._save_files_list(result, repo)

            # Finally, assume just writing text to file
            else:
                bot.debug("Saving content from list to file...")
                files += self._save_text_list(result, repo)

        # Case 2. The result is a string
        elif isinstance(result, str):
            files = self._save_str_result(files, result, repo)

        # Case 3. The result is a dictionary
        elif isinstance(result, dict):
            files.append(self._save_json(result, repo))

        elif result is None:
            bot.error("Result for task %s is None" % self.name)

        elif hasattr(self, "_write_results"):
            return self._write_results(result)

        # If it's unicode, try encoding, and then fail (repetitive)
        else:
            try:
                result = result.encode("utf-8")
                files = self._save_str_result(files, result, repo)

            except:
                bot.error("Unsupported result format %s" % type(result))

        # Get rid of None results (don't check excessively for None above)
        files = [f for f in files if f]
        return files