def checkout(dbapi_connection, connection_record, connection_proxy): """Checkout sub-processes connection for sub-processing if needed. Checkout is called when a connection is retrieved from the Pool. Args: dbapi_connection: A SqlALchemy DBAPI connection. connection_record: The SqlALchemy _ConnectionRecord managing the DBAPI connection. connection_proxy: The SqlALchemy _ConnectionFairy object which will proxy the public interface of the DBAPI connection for the lifespan of the checkout. Returns: None """ # Get PID of main process pid = os.getpid() # Detect if this is a sub-process if connection_record.info['pid'] != pid: # substitute log.debug() or similar here as desired log_message = ('''\ Parent process {} forked ({}) with an open database connection, \ which is being discarded and recreated.\ '''.format(connection_record.info['pid'], pid)) log.log2debug(20073, log_message) connection_record.connection = connection_proxy.connection = None raise exc.DisconnectionError('''\ Connection record belongs to pid {}, attempting to check out in pid {}\ '''.format(connection_record.info['pid'], pid))
def ingest(self): """Ingest cache data into the database. Args: None Returns: records: Number of records processed """ # Process _data = self.records() if bool(_data) is True: # Log log_message = ('''\ Processing ingest cache files. Batch ID: {}'''.format(self._batch_id)) log.log2debug(20004, log_message) # Add records to the database _records = Records(_data) _records.ingest() self.purge() # Log log_message = ('''\ Finished processing ingest cache files. Batch ID: {}'''.format(self._batch_id)) log.log2debug(20117, log_message) # Determine the number of key pairs read records = 0 for item in _data: records += len(item) return records
def _log(agent_program, identifier): """Create a standardized log message for posting. Args: agent_program: Agent program name identifier: Unique identifier for the source of the data. (AgentID) Returns: None """ # Log message that ties the identifier to an agent_program log_message = ('''\ Agent program {} posting data as {}'''.format(agent_program, identifier)) log.log2debug(1038, log_message)
def read_json_file(filepath, die=True): """Read the contents of a YAML file. Args: filepath: Path to file to be read die: Die if there is an error Returns: result: Dict of JSON read """ # Read file if filepath.endswith('.json'): try: with open(filepath, 'r') as file_handle: result = json.load(file_handle) except: log_message = ('''\ Error reading file {}. Check permissions, existence and file syntax.\ '''.format(filepath)) if bool(die) is True: log.log2die_safe(1012, log_message) else: log.log2debug(1013, log_message) return {} else: # Die if not a JSON file log_message = '{} is not a JSON file.'.format(filepath) if bool(die) is True: log.log2die_safe(1010, log_message) else: log.log2debug(1011, log_message) return {} # Return return result
def multiprocess_data(self): """Insert rows into the Data and DataPoint tables as necessary. Do all multiprocessing outside of the class for consistent results without unexpected hanging waiting for pool.join() to happen. Args: None Returns: None """ # Initialize key variables pattoo_db_records_lists_tuple = self._arguments pool_size = self._pool_size # Troubleshooting log log_message = 'Processing {} agents from cache'.format( len(pattoo_db_records_lists_tuple)) log.log2debug(20009, log_message) # Create a pool of sub process resources with get_context('spawn').Pool(processes=pool_size) as pool: # Create sub processes from the pool results = pool.starmap(_process_data_exception, pattoo_db_records_lists_tuple) # Wait for all the processes to end and get results pool.join() # Test for exceptions for result in results: if isinstance(result, ExceptionWrapper): result.re_raise()
def read_json_files(_directory, die=True, age=0, count=None): """Read the contents of all JSON files in a directory. Args: _directory: Directory with JSON files die: Die if there is an error age: Minimum age of files in seconds count: Return first X number of sorted filenames is not None Returns: result: sorted list of tuples containing JSON read from each file and filepath. Sorting is important as it causes the files with the older timestamp names to be processed first. This allows the last_timestamp column to be incrementally processed versus some unexpected order. [(filepath, JSON), (filepath, JSON) ...] """ # Initialize key variables json_found = False result = [] processed = 0 # Set age try: age = float(age) except: age = 0 # Verify directory if os.path.isdir(_directory) is False: log_message = 'Directory "{}" doesn\'t exist!'.format(_directory) log.log2die(1009, log_message) # Cycle through list of files in directory for filename in sorted(os.listdir(_directory)): # Examine all the '.json' files in directory if filename.endswith('.json'): # Read file and add to tuple list filepath = '{}{}{}'.format(_directory, os.sep, filename) fileage = time.time() - os.stat(filepath).st_mtime if fileage > age: _data = read_json_file(filepath, die=die) if bool(_data) is True: # JSON files found json_found = True result.append((filepath, _data)) else: # Ignore, don't update 'processed' value log_message = ('''\ Error reading file {}. Ignoring.'''.format(filepath)) log.log2debug(1053, log_message) continue # Stop if necessary processed += 1 if bool(count) is True: if processed == count: break # Verify JSON files found in directory. We cannot use logging as it # requires a logfile location from the configuration directory to work # properly if (json_found is False) and (bool(die) is True): log_message = ( 'No valid JSON files found in directory "{}" with ".json" ' 'extension.'.format(_directory)) log.log2die_safe(1060, log_message) # Return result.sort() return result
def read_yaml_file(filepath, as_string=False, die=True): """Read the contents of a YAML file. Args: filepath: Path to file to be read as_string: Return a string if True die: Die if there is an error Returns: result: Dict of yaml read """ # Initialize key variables if as_string is False: result = {} else: result = '' # Read file if filepath.endswith('.yaml'): try: with open(filepath, 'r') as file_handle: yaml_from_file = file_handle.read() except: log_message = ('Error reading file {}. Check permissions, ' 'existence and file syntax.' ''.format(filepath)) if bool(die) is True: log.log2die_safe(1006, log_message) else: log.log2debug(1014, log_message) return {} # Get result if as_string is False: try: result = yaml.safe_load(yaml_from_file) except: log_message = ('Error reading file {}. Check permissions, ' 'existence and file syntax.' ''.format(filepath)) if bool(die) is True: log.log2die_safe(1001, log_message) else: log.log2debug(1002, log_message) return {} else: result = yaml_from_file else: # Die if not a YAML file log_message = '{} is not a YAML file.'.format(filepath) if bool(die) is True: log.log2die_safe(1065, log_message) else: log.log2debug(1005, log_message) if bool(as_string) is False: return {} else: return '' # Return return result
def process_db_records(pattoo_db_records): """Insert all data values for an agent into database. Args: pattoo_db_records: List of dicts read from cache files. Returns: None Method: 1) Get all the idx_datapoint and idx_pair values that exist in the PattooDBrecord data from the database. All the records MUST be from the same source. 2) Add these idx values to tracking memory variables for speedy lookup 3) Ignore non numeric data values sent 4) Add data to the database. If new checksum values are found in the PattooDBrecord data, then create the new index values to the database, update the tracking memory variables before hand. """ # Initialize key variables _data = {} # Return if there is nothint to process if bool(pattoo_db_records) is False: return # Get DataPoint.idx_datapoint and idx_pair values from db. This is used to # speed up the process by reducing the need for future database access. agent_id = pattoo_db_records[0].pattoo_agent_id checksum_table = misc.agent_checksums(agent_id) # Process data for pdbr in pattoo_db_records: # We only want to insert non-string, non-None values if pdbr.pattoo_data_type in [DATA_NONE, DATA_STRING]: continue # Try to make the value a float for insertion into the database try: float_value = float(pdbr.pattoo_value) except: continue # Get the idx_datapoint value for the PattooDBrecord if pdbr.pattoo_checksum in checksum_table: # Get last_timestamp for existing idx_datapoint entry idx_datapoint = checksum_table[pdbr.pattoo_checksum].idx_datapoint else: # Entry not in database. Update the database and get the # required idx_datapoint idx_datapoint = datapoint.idx_datapoint(pdbr) if bool(idx_datapoint) is True: # Update the lookup table checksum_table[pdbr.pattoo_checksum] = ChecksumLookup( idx_datapoint=idx_datapoint, polling_interval=int(pdbr.pattoo_agent_polling_interval), last_timestamp=1) # Update the Glue table idx_pairs = get.pairs(pdbr) glue.insert_rows(idx_datapoint, idx_pairs) else: continue # Append item to items if pdbr.pattoo_timestamp > checksum_table[ pdbr.pattoo_checksum].last_timestamp: ''' Add the Data table results to a dict in case we have duplicate posting over the API. We need to key off a unique time dependent value per datapoint to prevent different datapoints at the same point in time overwriting the value. This is specifically for removing duplicates for the _SAME_ datapoint at the same point in time as could possibly occur with the restart of an agent causing a double posting or network issues. We therefore use a tuple of idx_datapoint and timestamp. ''' _data[(pdbr.pattoo_timestamp, idx_datapoint)] = IDXTimestampValue( idx_datapoint=idx_datapoint, polling_interval=int(pdbr.pattoo_agent_polling_interval), timestamp=pdbr.pattoo_timestamp, value=float_value) # Update the data table if bool(_data) is True: data.insert_rows(list(_data.values())) # Log message log_message = ('''\ Finished cache data processing for agent_id: {}'''.format(agent_id)) log.log2debug(20113, log_message)
def post(url, data, identifier, save=True): """Post data to central server. Args: url: URL to receive posted data identifier: Unique identifier for the source of the data. (AgentID) data: Data dict to post. If None, then uses self._post_data ( Used for testing and cache purging) save: When True, save data to cache directory if postinf fails Returns: success: True: if successful """ # Initialize key variables success = False response = False # Fail if nothing to post if isinstance(data, dict) is False or bool(data) is False: return success # Post data save to cache if this fails try: result = requests.post(url, json=data) response = True except: if save is True: # Save data to cache _save_data(data, identifier) else: # Proceed normally if there is a failure. # This will be logged later pass # Define success if response is True: if result.status_code == 200: success = True else: log_message = ('''\ HTTP {} error for identifier "{}" posted to server {}\ '''.format(result.status_code, identifier, url)) log.log2warning(1017, log_message) # Save data to cache, remote webserver isn't working properly _save_data(data, identifier) # Log message if success is True: log_message = ('''\ Data for identifier "{}" posted to server {}\ '''.format(identifier, url)) log.log2debug(1027, log_message) else: log_message = ('''\ Data for identifier "{}" failed to post to server {}\ '''.format(identifier, url)) log.log2warning(1028, log_message) # Return return success
def test_log2debug(self): """Testing function log2debug.""" # Test should not cause script to crash log.log2debug(self.code, self.message)
def encrypted_post(metadata, save=True): """Post encrypted data to the API server. First, the data is checked for its validity. Sencondly, the data and agent ID is stored in a dictionary with the key value pairs. The dictionary is converted to a string so that is can be encrypted. The encrypted data is then paired with a key, as a dictionary, distinguishing the data as encrypted. The dictionary is then converted to a string so it can be added to the request method as json. A response from the API server tells if the data was received and decrypted successfully. Args: metadata: _EncrypedPost object where: encryption: encrypt.Encryption object session: Requests session object symmetric_key: Symmetric key encryption_url: API URL to post the data to data: Data to post as a dict identifier: Agent identifier save: If True, save data to cache if API server is inaccessible Returns: success: True if successful """ # Initialize key variables success = False status = None # Fail if nothing to post if isinstance(metadata.data, dict) is False or bool( metadata.data) is False: return success # Prepare data for posting data = json.dumps({'data': metadata.data, 'source': metadata.identifier}) # Symmetrically encrypt data encrypted_data = metadata.encryption.sencrypt(data, metadata.symmetric_key) # Post data save to cache if this fails try: response = metadata.session.post( metadata.encryption_url, json=json.dumps({'encrypted_data': encrypted_data})) status = response.status_code except: _exception = sys.exc_info() log_message = ('Encrypted posting failure') log.log2exception(1075, _exception, message=log_message) if save is True: # Save data to cache _save_data(metadata.data, metadata.identifier) else: # Proceed normally if there is a failure. # This will be logged later pass # Checks if data was posted successfully if status == 202: log_message = 'Posted to API. Response "{}" from URL: "{}"'.format( status, metadata.encryption_url) log.log2debug(1059, log_message) # The data was accepted successfully success = True else: log_message = 'Error posting. Response "{}" from URL: "{}"'.format( status, metadata.encryption_url) log.log2warning(1058, log_message) return success
def encrypted_post(gpg, symmetric_key, req_session, url, data, identifier, save=True): """Post encrypted data to the API server. First, the data is checked for its validity. Sencondly, the data and agent ID is stored in a dictionary with the key value pairs. The dictionary is converted to a string so that is can be encrypted. The encrypted data is then paired with a key, as a dictionary, distinguishing the data as encrypted. The dictionary is then converted to a string so it can be added to the request method as json. A response from the API server tells if the data was received and decrypted successfully. Args: gpg (obj): Pgpier object to accommodate encryption symmetric_key (str): Symmetric key used to encrypt data req_session (obj): Request session used to remember the session used to communicate with the API server url (str): API URL to post the data to data (dict): Data to be posted to the API server identifier (str): The agent identification save (bool): True to save data to cache directory if posting fails Returns: general_result (bool) """ # Initialize key variables general_result = False # Fail if nothing to post if isinstance(data, dict) is False or bool(data) is False: return general_result # Prepare and encrypt data raw_data = {"data": data, "source": identifier} # Convert dictionary to string for encryption prep_data = json.dumps(raw_data) # Symmetrically encrypt data encrypted_data = gpg.symmetric_encrypt(prep_data, symmetric_key) post_data = {"encrypted_data": encrypted_data} post_data = json.dumps(post_data) # Post data save to cache if this fails response_code = None try: response = req_session.post(url, json=post_data) response_code = response.status_code except Exception as e: log_msg = 'Error encountered: >>>{}<<<'.format(e) log.log2warning(1075, log_msg) if save is True: # Save data to cache _save_data(data, identifier) else: # Proceed normally if there is a failure. # This will be logged later pass # Checks if data was posted successfully if response_code == 202: log_message = ('Posted to API. Response "{}".' 'from URL: "{}"'.format(response_code, url)) log.log2debug(1059, log_message) # The data was accepted successfully general_result = True else: log_message = ('Error posting. Response "{}".' 'from URL: "{}"'.format(response_code, url)) log.log2warning(1058, log_message) return general_result