Example #1
0
   def enable_logging(self, result_log_path, runnable_log_path):
      """Causes the extraction runner to keep logs of what it does
      If a log file from the same day exists, it is used. If not, new logs files are created for the current day
      If this method is not called, by default, info will be printed to stdout and stderr

      Args:
         result_log_path: String file path that indicates where to store the results log.
            This log stores when processing starts and ends for each run. It also notes if any errors occur during the run.
            For example, if this is '/path/to/result' the logs will be stored in files named '/path/to/log.[year]-[month]-[day].log'
         runnable_log_path: String file path that indicates where to store the runnables log.
            This log contains any log messages that runnables log.
            Path and day rotation is handled the same as with the results log
      """

      result_log_path = utils.expand_path(result_log_path)
      runnable_log_path = utils.expand_path(runnable_log_path)

      if not os.path.exists(os.path.dirname(result_log_path)): os.makedirs(os.path.dirname(result_log_path))
      if not os.path.exists(os.path.dirname(runnable_log_path)): os.makedirs(os.path.dirname(runnable_log_path))

      result_log_handler = extraction.ParallelTimedRotatingFileHandler(result_log_path, when='D', delay=True)
      runnable_log_handler = extraction.ParallelTimedRotatingFileHandler(runnable_log_path, when='D', delay=True)

      formatter = logging.Formatter('%(asctime)s: %(message)s')
      result_log_handler.setFormatter(formatter)
      runnable_log_handler.setFormatter(formatter)

      self.result_logger.handlers = []
      self.runnable_logger.handlers = []

      self.result_logger.addHandler(result_log_handler)
      self.runnable_logger.addHandler(runnable_log_handler)
Example #2
0
   def enable_logging(self, result_log_path, runnable_log_path):
      """Causes the extraction runner to keep logs of what it does
      If a log file from the same day exists, it is used. If not, new logs files are created for the current day
      If this method is not called, by default, info will be printed to stdout and stderr

      Args:
         result_log_path: String file path that indicates where to store the results log.
            This log stores when processing starts and ends for each run. It also notes if any errors occur during the run.
            For example, if this is '/path/to/result' the logs will be stored in files named '/path/to/log.[year]-[month]-[day].log'
         runnable_log_path: String file path that indicates where to store the runnables log.
            This log contains any log messages that runnables log.
            Path and day rotation is handled the same as with the results log
      """

      result_log_path = utils.expand_path(result_log_path)
      runnable_log_path = utils.expand_path(runnable_log_path)

      if not os.path.exists(os.path.dirname(result_log_path)): os.makedirs(os.path.dirname(result_log_path))
      if not os.path.exists(os.path.dirname(runnable_log_path)): os.makedirs(os.path.dirname(runnable_log_path))

      result_log_handler = extraction.log.ParallelTimedRotatingFileHandler(result_log_path, when='D', delay=True)
      runnable_log_handler = extraction.log.ParallelTimedRotatingFileHandler(runnable_log_path, when='D', delay=True)

      formatter = logging.Formatter('%(asctime)s: %(message)s')
      result_log_handler.setFormatter(formatter)
      runnable_log_handler.setFormatter(formatter)

      self.result_logger.handlers = []
      self.runnable_logger.handlers = []

      self.result_logger.addHandler(result_log_handler)
      self.runnable_logger.addHandler(runnable_log_handler)
Example #3
0
    def run_from_file(self, file_path, output_dir=None, **kwargs):
        """Runs the extractor on the file at file_path

      Reads the file at file_path from disk into a string. Then runs the extractors
      on this data string.

      Args:
         file_path: Reads this file and passes its data to the extractors and filters
         output_dir: An optional string that specifies the directory to write the results to
            If this isn't provided, results will be written to the same directory as the file
         **kwargs: Optional keyword arguments
            write_dep_errors: A Boolean. If True, extractors that fail because dependencies fail
               will still write a short xml file with this error to disk. (Good for clarity)
               If False, extractors with failing dependencies won't write anything to disk
            file_prefix: A string to prepend to all filenames that get written to disk

      """

        file_path = utils.expand_path(file_path)

        if not output_dir:
            output_dir = os.path.dirname(file_path)

        return self.run(open(file_path, 'rb').read(),
                        output_dir,
                        run_name=file_path,
                        **kwargs)
Example #4
0
   def run_from_file(self, file_path, output_dir=None, **kwargs):
      """Runs the extractor on the file at file_path

      Reads the file at file_path from disk into a string. Then runs the extractors
      on this data string.

      Args:
         file_path: Reads this file and passes its data to the extractors and filters
         output_dir: An optional string that specifies the directory to write the results to
            If this isn't provided, results will be written to the same directory as the file
         **kwargs: Optional keyword arguments
            write_dep_errors: A Boolean. If True, extractors that fail because dependencies fail
               will still write a short xml file with this error to disk. (Good for clarity)
               If False, extractors with failing dependencies won't write anything to disk
            file_prefix: A string to prepend to all filenames that get written to disk

      """

      file_path = utils.expand_path(file_path)

      if not output_dir:
         output_dir = os.path.dirname(file_path)

      return self.run(open(file_path, 'rb').read(), output_dir, run_name=file_path, **kwargs)