Beispiel #1
0
    def checkout(dbapi_connection, connection_record, connection_proxy):
        """Checkout sub-processes connection for sub-processing if needed.

            Checkout is called when a connection is retrieved from the Pool.

        Args:
            dbapi_connection: A SqlALchemy DBAPI connection.
            connection_record: The SqlALchemy _ConnectionRecord managing the
                DBAPI connection.
            connection_proxy: The SqlALchemy _ConnectionFairy object which
                will proxy the public interface of the DBAPI connection for the
                lifespan of the checkout.

        Returns:
            None

        """
        # Get PID of main process
        pid = os.getpid()

        # Detect if this is a sub-process
        if connection_record.info['pid'] != pid:
            # substitute log.debug() or similar here as desired
            log_message = ('''\
Parent process {} forked ({}) with an open database connection, \
which is being discarded and recreated.\
'''.format(connection_record.info['pid'], pid))
            log.log2debug(20073, log_message)

            connection_record.connection = connection_proxy.connection = None
            raise exc.DisconnectionError('''\
Connection record belongs to pid {}, attempting to check out in pid {}\
'''.format(connection_record.info['pid'], pid))
Beispiel #2
0
    def ingest(self):
        """Ingest cache data into the database.

        Args:
            None

        Returns:
            records: Number of records processed

        """
        # Process
        _data = self.records()
        if bool(_data) is True:
            # Log
            log_message = ('''\
Processing ingest cache files. Batch ID: {}'''.format(self._batch_id))
            log.log2debug(20004, log_message)

            # Add records to the database
            _records = Records(_data)
            _records.ingest()
            self.purge()

            # Log
            log_message = ('''\
Finished processing ingest cache files. Batch ID: {}'''.format(self._batch_id))
            log.log2debug(20117, log_message)

        # Determine the number of key pairs read
        records = 0
        for item in _data:
            records += len(item)
        return records
Beispiel #3
0
def _log(agent_program, identifier):
    """Create a standardized log message for posting.

    Args:
        agent_program: Agent program name
        identifier: Unique identifier for the source of the data. (AgentID)

    Returns:
        None

    """
    # Log message that ties the identifier to an agent_program
    log_message = ('''\
Agent program {} posting data as {}'''.format(agent_program, identifier))
    log.log2debug(1038, log_message)
Beispiel #4
0
def read_json_file(filepath, die=True):
    """Read the contents of a YAML file.

    Args:
        filepath: Path to file to be read
        die: Die if there is an error

    Returns:
        result: Dict of JSON read

    """
    # Read file
    if filepath.endswith('.json'):
        try:
            with open(filepath, 'r') as file_handle:
                result = json.load(file_handle)
        except:
            log_message = ('''\
Error reading file {}. Check permissions, existence and file syntax.\
'''.format(filepath))
            if bool(die) is True:
                log.log2die_safe(1012, log_message)
            else:
                log.log2debug(1013, log_message)
                return {}

    else:
        # Die if not a JSON file
        log_message = '{} is not a JSON file.'.format(filepath)
        if bool(die) is True:
            log.log2die_safe(1010, log_message)
        else:
            log.log2debug(1011, log_message)
            return {}

    # Return
    return result
Beispiel #5
0
    def multiprocess_data(self):
        """Insert rows into the Data and DataPoint tables as necessary.

        Do all multiprocessing outside of the class for consistent results
        without unexpected hanging waiting for pool.join() to happen.

        Args:
            None

        Returns:
            None

        """
        # Initialize key variables
        pattoo_db_records_lists_tuple = self._arguments
        pool_size = self._pool_size

        # Troubleshooting log
        log_message = 'Processing {} agents from cache'.format(
            len(pattoo_db_records_lists_tuple))
        log.log2debug(20009, log_message)

        # Create a pool of sub process resources
        with get_context('spawn').Pool(processes=pool_size) as pool:

            # Create sub processes from the pool
            results = pool.starmap(_process_data_exception,
                                   pattoo_db_records_lists_tuple)

        # Wait for all the processes to end and get results
        pool.join()

        # Test for exceptions
        for result in results:
            if isinstance(result, ExceptionWrapper):
                result.re_raise()
Beispiel #6
0
def read_json_files(_directory, die=True, age=0, count=None):
    """Read the contents of all JSON files in a directory.

    Args:
        _directory: Directory with JSON files
        die: Die if there is an error
        age: Minimum age of files in seconds
        count: Return first X number of sorted filenames is not None

    Returns:
        result: sorted list of tuples containing JSON read from each file and
            filepath. Sorting is important as it causes the files with the
            older timestamp names to be processed first. This allows the
            last_timestamp column to be incrementally processed versus some
            unexpected order. [(filepath, JSON), (filepath, JSON) ...]

    """
    # Initialize key variables
    json_found = False
    result = []
    processed = 0

    # Set age
    try:
        age = float(age)
    except:
        age = 0

    # Verify directory
    if os.path.isdir(_directory) is False:
        log_message = 'Directory "{}" doesn\'t exist!'.format(_directory)
        log.log2die(1009, log_message)

    # Cycle through list of files in directory
    for filename in sorted(os.listdir(_directory)):
        # Examine all the '.json' files in directory
        if filename.endswith('.json'):
            # Read file and add to tuple list
            filepath = '{}{}{}'.format(_directory, os.sep, filename)
            fileage = time.time() - os.stat(filepath).st_mtime
            if fileage > age:
                _data = read_json_file(filepath, die=die)
                if bool(_data) is True:
                    # JSON files found
                    json_found = True
                    result.append((filepath, _data))
                else:
                    # Ignore, don't update 'processed' value
                    log_message = ('''\
Error reading file {}. Ignoring.'''.format(filepath))
                    log.log2debug(1053, log_message)
                    continue

            # Stop if necessary
            processed += 1
            if bool(count) is True:
                if processed == count:
                    break

    # Verify JSON files found in directory. We cannot use logging as it
    # requires a logfile location from the configuration directory to work
    # properly
    if (json_found is False) and (bool(die) is True):
        log_message = (
            'No valid JSON files found in directory "{}" with ".json" '
            'extension.'.format(_directory))
        log.log2die_safe(1060, log_message)

    # Return
    result.sort()
    return result
Beispiel #7
0
def read_yaml_file(filepath, as_string=False, die=True):
    """Read the contents of a YAML file.

    Args:
        filepath: Path to file to be read
        as_string: Return a string if True
        die: Die if there is an error

    Returns:
        result: Dict of yaml read

    """
    # Initialize key variables
    if as_string is False:
        result = {}
    else:
        result = ''

    # Read file
    if filepath.endswith('.yaml'):
        try:
            with open(filepath, 'r') as file_handle:
                yaml_from_file = file_handle.read()
        except:
            log_message = ('Error reading file {}. Check permissions, '
                           'existence and file syntax.'
                           ''.format(filepath))
            if bool(die) is True:
                log.log2die_safe(1006, log_message)
            else:
                log.log2debug(1014, log_message)
                return {}

        # Get result
        if as_string is False:
            try:
                result = yaml.safe_load(yaml_from_file)
            except:
                log_message = ('Error reading file {}. Check permissions, '
                               'existence and file syntax.'
                               ''.format(filepath))
                if bool(die) is True:
                    log.log2die_safe(1001, log_message)
                else:
                    log.log2debug(1002, log_message)
                    return {}
        else:
            result = yaml_from_file

    else:
        # Die if not a YAML file
        log_message = '{} is not a YAML file.'.format(filepath)
        if bool(die) is True:
            log.log2die_safe(1065, log_message)
        else:
            log.log2debug(1005, log_message)
            if bool(as_string) is False:
                return {}
            else:
                return ''

    # Return
    return result
Beispiel #8
0
def process_db_records(pattoo_db_records):
    """Insert all data values for an agent into database.

    Args:
        pattoo_db_records: List of dicts read from cache files.

    Returns:
        None

    Method:
        1) Get all the idx_datapoint and idx_pair values that exist in the
           PattooDBrecord data from the database. All the records MUST be
           from the same source.
        2) Add these idx values to tracking memory variables for speedy lookup
        3) Ignore non numeric data values sent
        4) Add data to the database. If new checksum values are found in the
           PattooDBrecord data, then create the new index values to the
           database, update the tracking memory variables before hand.

    """
    # Initialize key variables
    _data = {}

    # Return if there is nothint to process
    if bool(pattoo_db_records) is False:
        return

    # Get DataPoint.idx_datapoint and idx_pair values from db. This is used to
    # speed up the process by reducing the need for future database access.
    agent_id = pattoo_db_records[0].pattoo_agent_id
    checksum_table = misc.agent_checksums(agent_id)

    # Process data
    for pdbr in pattoo_db_records:
        # We only want to insert non-string, non-None values
        if pdbr.pattoo_data_type in [DATA_NONE, DATA_STRING]:
            continue

        # Try to make the value a float for insertion into the database
        try:
            float_value = float(pdbr.pattoo_value)
        except:
            continue

        # Get the idx_datapoint value for the PattooDBrecord
        if pdbr.pattoo_checksum in checksum_table:
            # Get last_timestamp for existing idx_datapoint entry
            idx_datapoint = checksum_table[pdbr.pattoo_checksum].idx_datapoint
        else:
            # Entry not in database. Update the database and get the
            # required idx_datapoint
            idx_datapoint = datapoint.idx_datapoint(pdbr)
            if bool(idx_datapoint) is True:
                # Update the lookup table
                checksum_table[pdbr.pattoo_checksum] = ChecksumLookup(
                    idx_datapoint=idx_datapoint,
                    polling_interval=int(pdbr.pattoo_agent_polling_interval),
                    last_timestamp=1)

                # Update the Glue table
                idx_pairs = get.pairs(pdbr)
                glue.insert_rows(idx_datapoint, idx_pairs)
            else:
                continue

        # Append item to items
        if pdbr.pattoo_timestamp > checksum_table[
                pdbr.pattoo_checksum].last_timestamp:
            '''
            Add the Data table results to a dict in case we have duplicate
            posting over the API. We need to key off a unique time dependent
            value per datapoint to prevent different datapoints at the same
            point in time overwriting the value. This is specifically for
            removing duplicates for the _SAME_ datapoint at the same point in
            time as could possibly occur with the restart of an agent causing a
            double posting or network issues. We therefore use a tuple of
            idx_datapoint and timestamp.
            '''
            _data[(pdbr.pattoo_timestamp, idx_datapoint)] = IDXTimestampValue(
                idx_datapoint=idx_datapoint,
                polling_interval=int(pdbr.pattoo_agent_polling_interval),
                timestamp=pdbr.pattoo_timestamp,
                value=float_value)

    # Update the data table
    if bool(_data) is True:
        data.insert_rows(list(_data.values()))

    # Log message
    log_message = ('''\
Finished cache data processing for agent_id: {}'''.format(agent_id))
    log.log2debug(20113, log_message)
Beispiel #9
0
def post(url, data, identifier, save=True):
    """Post data to central server.

    Args:
        url: URL to receive posted data
        identifier: Unique identifier for the source of the data. (AgentID)
        data: Data dict to post. If None, then uses self._post_data (
            Used for testing and cache purging)
        save: When True, save data to cache directory if postinf fails

    Returns:
        success: True: if successful

    """
    # Initialize key variables
    success = False
    response = False

    # Fail if nothing to post
    if isinstance(data, dict) is False or bool(data) is False:
        return success

    # Post data save to cache if this fails
    try:
        result = requests.post(url, json=data)
        response = True
    except:
        if save is True:
            # Save data to cache
            _save_data(data, identifier)
        else:
            # Proceed normally if there is a failure.
            # This will be logged later
            pass

    # Define success
    if response is True:
        if result.status_code == 200:
            success = True
        else:
            log_message = ('''\
HTTP {} error for identifier "{}" posted to server {}\
'''.format(result.status_code, identifier, url))
            log.log2warning(1017, log_message)
            # Save data to cache, remote webserver isn't working properly
            _save_data(data, identifier)

    # Log message
    if success is True:
        log_message = ('''\
Data for identifier "{}" posted to server {}\
'''.format(identifier, url))
        log.log2debug(1027, log_message)
    else:
        log_message = ('''\
Data for identifier "{}" failed to post to server {}\
'''.format(identifier, url))
        log.log2warning(1028, log_message)

    # Return
    return success
Beispiel #10
0
 def test_log2debug(self):
     """Testing function log2debug."""
     # Test should not cause script to crash
     log.log2debug(self.code, self.message)
def encrypted_post(metadata, save=True):
    """Post encrypted data to the API server.

    First, the data is checked for its validity. Sencondly,
    the data and agent ID is stored in a dictionary with
    the key value pairs. The dictionary is converted to a
    string so that is can be encrypted. The encrypted data
    is then paired with a key, as a dictionary, distinguishing
    the data as encrypted. The dictionary is then converted
    to a string so it can be added to the request method
    as json. A response from the API server tells if the data
    was received and decrypted successfully.

    Args:
        metadata: _EncrypedPost object where:
            encryption: encrypt.Encryption object
            session: Requests session object
            symmetric_key: Symmetric key
            encryption_url: API URL to post the data to
            data: Data to post as a dict
            identifier: Agent identifier
        save: If True, save data to cache if API server is inaccessible

    Returns:
        success: True if successful

    """
    # Initialize key variables
    success = False
    status = None

    # Fail if nothing to post
    if isinstance(metadata.data, dict) is False or bool(
            metadata.data) is False:
        return success

    # Prepare data for posting
    data = json.dumps({'data': metadata.data, 'source': metadata.identifier})

    # Symmetrically encrypt data
    encrypted_data = metadata.encryption.sencrypt(data, metadata.symmetric_key)

    # Post data save to cache if this fails
    try:
        response = metadata.session.post(
            metadata.encryption_url,
            json=json.dumps({'encrypted_data': encrypted_data}))
        status = response.status_code
    except:
        _exception = sys.exc_info()
        log_message = ('Encrypted posting failure')
        log.log2exception(1075, _exception, message=log_message)
        if save is True:
            # Save data to cache
            _save_data(metadata.data, metadata.identifier)
        else:
            # Proceed normally if there is a failure.
            # This will be logged later
            pass

    # Checks if data was posted successfully
    if status == 202:
        log_message = 'Posted to API. Response "{}" from URL: "{}"'.format(
            status, metadata.encryption_url)
        log.log2debug(1059, log_message)

        # The data was accepted successfully
        success = True
    else:
        log_message = 'Error posting. Response "{}" from URL: "{}"'.format(
            status, metadata.encryption_url)
        log.log2warning(1058, log_message)

    return success
Beispiel #12
0
def encrypted_post(gpg,
                   symmetric_key,
                   req_session,
                   url,
                   data,
                   identifier,
                   save=True):
    """Post encrypted data to the API server.

    First, the data is checked for its validity. Sencondly,
    the data and agent ID is stored in a dictionary with
    the key value pairs. The dictionary is converted to a
    string so that is can be encrypted. The encrypted data
    is then paired with a key, as a dictionary, distinguishing
    the data as encrypted. The dictionary is then converted
    to a string so it can be added to the request method
    as json. A response from the API server tells if the data
    was received and decrypted successfully.

    Args:
        gpg (obj): Pgpier object to accommodate encryption
        symmetric_key (str): Symmetric key used to encrypt data
        req_session (obj): Request session used to remember the session
                           used to communicate with the API server
        url (str): API URL to post the data to
        data (dict): Data to be posted to the API server
        identifier (str): The agent identification
        save (bool): True to save data to cache directory if
                     posting fails

    Returns:
        general_result (bool)

    """
    # Initialize key variables
    general_result = False

    # Fail if nothing to post
    if isinstance(data, dict) is False or bool(data) is False:
        return general_result

    # Prepare and encrypt data
    raw_data = {"data": data, "source": identifier}
    # Convert dictionary to string for encryption
    prep_data = json.dumps(raw_data)
    # Symmetrically encrypt data
    encrypted_data = gpg.symmetric_encrypt(prep_data, symmetric_key)
    post_data = {"encrypted_data": encrypted_data}
    post_data = json.dumps(post_data)

    # Post data save to cache if this fails
    response_code = None
    try:
        response = req_session.post(url, json=post_data)
        response_code = response.status_code
    except Exception as e:
        log_msg = 'Error encountered: >>>{}<<<'.format(e)
        log.log2warning(1075, log_msg)
        if save is True:
            # Save data to cache
            _save_data(data, identifier)
        else:
            # Proceed normally if there is a failure.
            # This will be logged later
            pass

    # Checks if data was posted successfully
    if response_code == 202:
        log_message = ('Posted to API. Response "{}".'
                       'from URL: "{}"'.format(response_code, url))
        log.log2debug(1059, log_message)
        # The data was accepted successfully
        general_result = True
    else:
        log_message = ('Error posting. Response "{}".'
                       'from URL: "{}"'.format(response_code, url))
        log.log2warning(1058, log_message)

    return general_result