def _copy_to_replication_location(self, node, timestamp=current_time()): """ Copy a single node from frozen location to replication location. As an extra precaution, checksums are re-calculated for files after copy, and compared with the checksums of the initial checksum generation phase. Note that for efficiencies sake, during repair of a project, the file will not be re-copied if a replication already exists and the file size is the same for both the frozen file and already replicated file. """ src_path = construct_file_path(self._uida_conf_vars, node) dest_path = construct_file_path(self._uida_conf_vars, node, replication=True) if os.path.exists(dest_path): if os.stat(src_path).st_size == os.stat(dest_path).st_size: self._logger.debug('Skipping already replicated file: %s' % dest_path) # If the file has no replicated timestamp defined, set it to the frozen timestamp if not node.get('replicated', None): self._logger.debug( 'Fixing missing replicated timestamp: %s' % node['frozen']) node['replicated'] = node['frozen'] node['_updated'] = True return try: shutil.copy(src_path, dest_path) except IOError as e: # ENOENT(2): file does not exist, raised also on missing dest parent dir if e.errno != errno.ENOENT: raise os.makedirs(os.path.dirname(dest_path), exist_ok=True) shutil.copy(src_path, dest_path) try: replicated_checksum = self._get_file_checksum(dest_path) except Exception as e: raise Exception( 'Error generating checksum for file: %s, pathname: %s, error: %s' % (node['pid'], node['pathname'], str(e))) # Remove any sha256: URI prefix node['checksum'] = self._get_checksum_value(node['checksum']) if node['checksum'] != replicated_checksum: raise Exception( 'Checksum mismatch after replication for file: %s, pathname: %s, frozen_checksum: %s, replicated_checksum: %s' % (node['pid'], node['pathname'], node['checksum'], replicated_checksum)) node['replicated'] = timestamp node['_updated'] = True node['_copied'] = True
def create_test_file(uida_conf_vars, test_file_data): file_path = construct_file_path(uida_conf_vars, test_file_data) dir_path, file_name = os.path.split(file_path) try: os.makedirs(dir_path) except OSError as e: if e.errno != errno.EEXIST: raise with open(file_path, 'w+') as f: f.write('filename is %s' % file_name)
def test_copy_to_replication_location(self): """ Ensure the actual file copy works. """ test_action = deepcopy(ida_test_data['actions'][5]) # note - an internal reads project identifier from the rabbitmq message self.agent.rabbitmq_message = test_action nodes = self.agent._get_nodes_associated_with_action(test_action) for node in nodes: self.agent._copy_to_replication_location(node) # assert copied files exist for node in nodes: file_path = construct_file_path(self.agent._uida_conf_vars, node, replication=True) self.assertEqual(isfile(file_path), True, 'copied file does not exist!')
def _process_checksums(self, action): self._logger.info('Processing checksums...') nodes = self._get_nodes_associated_with_action(action) self._logger.debug('Generating checksums...') for node in nodes: if self._graceful_shutdown_started: raise SystemExit # Generate local filesystem pathname to file in frozen area file_path = construct_file_path(self._uida_conf_vars, node) # If the file size reported for the file differs from the file size on disk, # or if no file size is recorded in IDA for the file, or if no checksum is # recorded in IDA for the file, then the file size should be updated in IDA # based on the file size on disk, and a new checksum generated based on the # current file on disk, and the new checksum recorded in IDA. # # The following logic works efficiently both for freeze and repair actions. # Assume no updates to either file size or checksum required node_updated = False # Get reported file size, if defined try: node_size = node['size'] except: node_size = None # Get reported checksum, if defined try: node_checksum = node['checksum'] except: node_checksum = None # If no file size is reported, or we have a repair action, get the actual size on disk # Else trust the reported size and avoid the cost of retrieving the size on disk if node_size == None or action['action'] == 'repair': file_size = os.path.getsize(file_path) else: file_size = node_size # If the reported file size disagrees with the determined file size, record file size # on disk and generate and record new checksum if node_size != file_size: self._logger.debug('Recording both size and checksum for file %s' % node['pid']) node_size = file_size try: node_checksum = self._get_file_checksum(file_path) except Exception as e: raise Exception('Error generating checksum for file: %s, pathname: %s, error: %s' % (node['pid'], node['pathname'], str(e))) node_updated = True # If still no checksum, generate and record new checksum if node_checksum == None: self._logger.debug('Recording checksum for file %s' % node['pid']) try: node_checksum = self._get_file_checksum(file_path) except Exception as e: raise Exception('Error generating checksum for file: %s, pathname: %s, error: %s' % (node['pid'], node['pathname'], str(e))) node_updated = True # If either new file size or new checksum, update node values and flag node as updated if node_updated: node['size'] = node_size node['checksum'] = node_checksum node['_updated'] = node_updated # Update db records for all updated nodes self._save_nodes_to_db(nodes, fields=['checksum', 'size'], updated_only=True) self._save_action_completion_timestamp(action, 'checksums') self._logger.info('Checksums processing OK') return nodes