Beispiel #1
0
def get_extra_as_options(input_str):
    if '|' not in input_str:
        raise JobDescriptionError('No frequency and/or time defined')
    metadata = input_str.split('|')[1:]
    if len(metadata) == 1:
        if ':' in metadata[0]:
            frequency = '1d'
            time_ = metadata[0]
        else:
            frequency = metadata[0]
            time_ = None
    else:
        frequency, time_ = metadata

    n = Namespace()
    n.add_option(
        'frequency',
        doc='frequency',
        default=frequency,
        #from_string_converter=int
        exclude_from_print_conf=True,
        exclude_from_dump_conf=True
    )
    n.add_option(
        'time',
        doc='time',
        default=time_,
        exclude_from_print_conf=True,
        exclude_from_dump_conf=True
    )
    return n
Beispiel #2
0
def logging_required_config(app_name):
    lc = Namespace()
    lc.namespace("logging")
    lc.logging.add_option("syslog_host", doc="syslog hostname", default="localhost")
    lc.logging.add_option("syslog_port", doc="syslog port", default=514)
    lc.logging.add_option(
        "syslog_facility_string", doc='syslog facility string ("user", "local0", etc)', default="user"
    )
    lc.logging.add_option(
        "syslog_line_format_string",
        doc="python logging system format for syslog entries",
        default="%s (pid {process}): " "{asctime} {levelname} - {threadName} - " "{message}" % app_name,
    )
    lc.logging.add_option(
        "syslog_error_logging_level",
        doc="logging level for the log file (10 - DEBUG, 20 " "- INFO, 30 - WARNING, 40 - ERROR, 50 - CRITICAL)",
        default=40,
    )
    lc.logging.add_option(
        "stderr_line_format_string",
        doc="python logging system format for logging to stderr",
        default="{asctime} {levelname} - {threadName} - " "{message}",
    )
    lc.logging.add_option(
        "stderr_error_logging_level",
        doc="logging level for the logging to stderr (10 - "
        "DEBUG, 20 - INFO, 30 - WARNING, 40 - ERROR, "
        "50 - CRITICAL)",
        default=10,
    )
    return lc
    def test_classes_in_namespaces_converter_5(self):
        n = Namespace()
        n.add_option(
            'kls_list',
            default=(
                'socorro.unittest.lib.test_converters.Alpha, '
                'socorro.unittest.lib.test_converters.Alpha, '
                'socorro.unittest.lib.test_converters.Alpha'
            ),
            from_string_converter=str_to_classes_in_namespaces_converter(
                '%(name)s_%(index)02d'
            )
        )

        cm = ConfigurationManager(
            n,
            [{
                'kls_list': (
                    'socorro.unittest.lib.test_converters.Alpha, '
                    'socorro.unittest.lib.test_converters.Beta, '
                    'socorro.unittest.lib.test_converters.Beta, '
                    'socorro.unittest.lib.test_converters.Alpha'
                ),
                'Alpha_00.a': 21,
                'Beta_01.b': 38,
            }]
        )
        config = cm.get_config()

        self.assertEqual(len(config.kls_list.subordinate_namespace_names), 4)
        for i, (a_class_name, a_class, ns_name) in \
            enumerate(config.kls_list.class_list):
            self.assertTrue(isinstance(a_class_name, str))
            self.assertEqual(a_class_name, a_class.__name__)
            self.assertEqual(ns_name, "%s_%02d" % (a_class_name, i))
Beispiel #4
0
def define_config():
    definition = Namespace()
    definition.add_option(
      name='devowel',
      default=False
    )
    return definition
Beispiel #5
0
def main(initial_app, values_source_list=None):
    if isinstance(initial_app, basestring):
        initial_app = class_converter(initial_app)

    # the only config parameter is a special one that refers to a class or
    # module that defines an application.  In order to qualify, a class must
    # have a constructor that accepts a DotDict derivative as the sole
    # input parameter.  It must also have a 'main' function that accepts no
    # parameters.  For a module to be acceptable, it must have a main
    # function that accepts a DotDict derivative as its input parameter.
    app_definition = Namespace()
    app_definition.add_option(
      'application',
      doc='the fully qualified module or class of the '
          'application',
      default=initial_app,
      from_string_converter=class_converter
    )
    try:
        app_name = initial_app.app_name  # this will be used as the default
                                         # b
        app_version = initial_app.app_version
        app_description = initial_app.app_description
    except AttributeError, x:
        raise AppDetailMissingError(str(x))
Beispiel #6
0
    def test_classes_in_namespaces_converter_4(self):
        n = Namespace()
        n.add_option('kls_list',
                      default='configman.tests.test_converters.Alpha, '
                              'configman.tests.test_converters.Alpha, '
                              'configman.tests.test_converters.Alpha',
                      from_string_converter=
                         converters.classes_in_namespaces_converter(
                          'kls%d',
                          'kls',
                          instantiate_classes=True))

        cm = ConfigurationManager(
          n,
          [{'kls_list':'configman.tests.test_converters.Alpha, '
                       'configman.tests.test_converters.Beta, '
                       'configman.tests.test_converters.Beta, '
                       'configman.tests.test_converters.Alpha'}])
        config = cm.get_config()

        self.assertEqual(len(config.kls_list.subordinate_namespace_names), 4)
        for x in config.kls_list.subordinate_namespace_names:
            self.assertTrue(x in config)
            self.assertTrue('kls_instance' in config[x])
            self.assertTrue(isinstance(config[x].kls_instance,
                                       config[x].kls))
    def test_for_mapping_long_doc_in_write_conf(self):
        n = self._some_namespaces()
        n = Namespace(doc='top')
        n.add_option(
            'aaa',
            'Default Value Goes In Here',
            'This time the documentation string is really long. So long '
            'that we have to write it on multiple lines.',
        )
        cm = ConfigurationManager(
            n,
            values_source_list=[],
        )
        out = StringIO()
        cm.write_conf(for_mapping, opener=stringIO_context_wrapper(out))
        received = out.getvalue()
        out.close()
        for line in received.splitlines():
            self.assertTrue(len(line) < 80, line)
        expected = """
# This time the documentation string is really long. So long that we have to
# write it on multiple lines. (default: 'Default Value Goes In Here')
aaa='Default Value Goes In Here'
        """.strip()
        self.assertEqual(received.strip(), expected)
    def test_basic_crashstorage(self):

        required_config = Namespace()

        mock_logging = Mock()
        required_config.add_option('logger', default=mock_logging)

        config_manager = ConfigurationManager(
          [required_config],
          app_name='testapp',
          app_version='1.0',
          app_description='app description',
          values_source_list=[{
            'logger': mock_logging,
          }]
        )

        with config_manager.context() as config:
            crashstorage = CrashStorageBase(
              config,
              quit_check_callback=fake_quit_check
            )
            crashstorage.save_raw_crash({}, 'payload', 'ooid')
            crashstorage.save_processed({})
            self.assertRaises(NotImplementedError,
                              crashstorage.get_raw_crash, 'ooid')
            self.assertRaises(NotImplementedError,
                              crashstorage.get_raw_dump, 'ooid')
            self.assertRaises(NotImplementedError,
                              crashstorage.get_processed, 'ooid')
            self.assertRaises(NotImplementedError,
                              crashstorage.remove, 'ooid')
            self.assertRaises(StopIteration, crashstorage.new_crashes)
            crashstorage.close()
    def test_no_rollback_exception_with_postgres(self):
        required_config = Namespace()
        required_config.add_option(
          'transaction_executor_class',
          default=TransactionExecutor,
          doc='a class that will execute transactions'
        )
        mock_logging = MockLogging()
        required_config.add_option('logger', default=mock_logging)

        config_manager = ConfigurationManager(
          [required_config],
          app_name='testapp',
          app_version='1.0',
          app_description='app description',
          values_source_list=[{'database_class': MockConnectionContext}],
        )
        with config_manager.context() as config:
            executor = config.transaction_executor_class(config)

            def mock_function(connection):
                assert isinstance(connection, MockConnection)
                raise NameError('crap!')

            self.assertRaises(NameError, executor, mock_function)

            self.assertEqual(commit_count, 0)
            self.assertEqual(rollback_count, 0)
            self.assertTrue(mock_logging.errors)
Beispiel #10
0
    def test_write_with_imported_module_with_regex(self):
        required_config = Namespace()
        required_config.add_option(
          'identifier',
          doc='just an identifier re',
          default=r'[a-zA-Z][a-zA-Z0-9]*',
          from_string_converter=re.compile
        )
        cm = ConfigurationManager(
            required_config,
            values_source_list=[],
        )
        config = cm.get_config()

        s = StringIO()

        @contextlib.contextmanager
        def s_opener():
            yield s

        cm.write_conf('py', s_opener)
        generated_python_module_text = s.getvalue()
        expected = """# generated Python configman file



# just an identifier re
identifier = "[a-zA-Z][a-zA-Z0-9]*"
"""
        self.assertEqual(generated_python_module_text, expected)
Beispiel #11
0
    def test_classes_in_namespaces_converter_4(self):
        n = Namespace()
        n.add_option(
            "kls_list",
            default="configman.tests.test_converters.Alpha, "
            "configman.tests.test_converters.Alpha, "
            "configman.tests.test_converters.Alpha",
            from_string_converter=converters.classes_in_namespaces_converter("kls%d", "kls", instantiate_classes=True),
        )

        cm = ConfigurationManager(
            n,
            [
                {
                    "kls_list": "configman.tests.test_converters.Alpha, "
                    "configman.tests.test_converters.Beta, "
                    "configman.tests.test_converters.Beta, "
                    "configman.tests.test_converters.Alpha"
                }
            ],
        )
        config = cm.get_config()

        self.assertEqual(len(config.kls_list.subordinate_namespace_names), 4)
        for x in config.kls_list.subordinate_namespace_names:
            self.assertTrue(x in config)
            self.assertTrue("kls_instance" in config[x])
            self.assertTrue(isinstance(config[x].kls_instance, config[x].kls))
def run(*crash_ids):

    definition_source = Namespace()
    definition_source.namespace('queuing')
    definition_source.queuing.add_option(
        'rabbitmq_reprocessing_class',
        default=SingleCrashMQCrashStorage,
    )
    config_dict = {
        'resource': {
            'rabbitmq': {
                'host': 'localhost',
                'port': '5672',
                'virtual_host': '/'
            }
        },
        'secrets': {
            'rabbitmq': {
                'rabbitmq_password': '******',
                'rabbitmq_user': '******'
            }
        }
    }
    config = configuration(
        definition_source=definition_source,
        values_source_list=[config_dict],
    )
    config.queuing.logger = logger
    config.logger = logger
    storage = SingleCrashMQCrashStorage(config=config['queuing'])
    for crash_id in crash_ids:
        print storage.submit(crash_id)
    return 0
Beispiel #13
0
    def test_basic_usage_with_postgres(self):
        required_config = Namespace()
        required_config.add_option(
          'transaction_executor_class',
          #default=TransactionExecutorWithBackoff,
          default=TransactionExecutor,
          doc='a class that will execute transactions'
        )
        required_config.add_option(
          'database_class',
          default=MockConnectionContext,
          from_string_converter=class_converter
        )

        config_manager = ConfigurationManager(
          [required_config],
          app_name='testapp',
          app_version='1.0',
          app_description='app description',
          values_source_list=[],
        )
        with config_manager.context() as config:
            mocked_context = config.database_class(config)
            executor = config.transaction_executor_class(config,
                                                         mocked_context)
            _function_calls = []  # some mutable

            def mock_function(connection):
                assert isinstance(connection, MockConnection)
                _function_calls.append(connection)

            executor(mock_function)
            self.assertTrue(_function_calls)
            self.assertEqual(commit_count, 1)
            self.assertEqual(rollback_count, 0)
Beispiel #14
0
    def test_classes_in_namespaces_converter_4(self):
        n = Namespace()
        n.add_option(
            'kls_list',
            default=(
                'socorro.unittest.lib.test_converters.Alpha, '
                'socorro.unittest.lib.test_converters.Alpha, '
                'socorro.unittest.lib.test_converters.Alpha'
            ),
            from_string_converter=str_to_classes_in_namespaces_converter(
                '%(name)s_%(index)02d'
            )
        )

        cm = ConfigurationManager(
            n,
            [{
                'kls_list': (
                    'socorro.unittest.lib.test_converters.Alpha, '
                    'socorro.unittest.lib.test_converters.Beta, '
                    'socorro.unittest.lib.test_converters.Beta, '
                    'socorro.unittest.lib.test_converters.Alpha'
                ),
                'Alpha_00.a': 21,
                'Beta_01.b': 38,
            }]
        )
        config = cm.get_config()


        self.assertEqual(len(config.kls_list.subordinate_namespace_names), 4)
        for x in config.kls_list.subordinate_namespace_names:
            self.assertTrue(x in config)
        self.assertEqual(config.Alpha_00.a, 21)
        self.assertEqual(config.Beta_01.b, 38)
Beispiel #15
0
def main(initial_app, values_source_list=None, config_path=None):
    if isinstance(initial_app, basestring):
        initial_app = class_converter(initial_app)

    if config_path is None:
        default = './config'
        config_path = os.environ.get(
            'DEFAULT_SOCORRO_CONFIG_PATH',
            default
        )
        if config_path != default:
            # you tried to set it, then it must be a valid directory
            if not os.path.isdir(config_path):
                raise IOError('%s is not a valid directory' % config_path)

    # the only config parameter is a special one that refers to a class or
    # module that defines an application.  In order to qualify, a class must
    # have a constructor that accepts a DotDict derivative as the sole
    # input parameter.  It must also have a 'main' function that accepts no
    # parameters.  For a module to be acceptable, it must have a main
    # function that accepts a DotDict derivative as its input parameter.
    app_definition = Namespace()
    app_definition.add_option(
      'application',
      doc='the fully qualified module or class of the application',
      default=initial_app,
      from_string_converter=class_converter
    )
    try:
        app_name = initial_app.app_name  # this will be used as the default
                                         # b
        app_version = initial_app.app_version
        app_description = initial_app.app_description
    except AttributeError, x:
        raise AppDetailMissingError(str(x))
    def test_poly_crash_storage_immutability_deeper(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=PolyCrashStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'storage_classes': (
                'socorro.unittest.external.test_crashstorage_base'
                '.MutatingProcessedCrashCrashStorage'
            ),
        }
        cm = ConfigurationManager(n, values_source_list=[value])
        with cm.context() as config:
            raw_crash = {'ooid': '12345'}
            dump = '12345'
            processed_crash = {
                'foo': DotDict({'other': 'thing'}),
                'bar': SocorroDotDict({'something': 'else'}),
            }

            poly_store = config.storage(config)

            poly_store.save_raw_and_processed(
                raw_crash,
                dump,
                processed_crash,
                'n'
            )
            eq_(processed_crash['foo']['other'], 'thing')
            eq_(processed_crash['bar']['something'], 'else')
    def test_basic_crashstorage(self):

        required_config = Namespace()

        mock_logging = Mock()
        required_config.add_option("logger", default=mock_logging)
        required_config.update(CrashStorageBase.required_config)

        config_manager = ConfigurationManager(
            [required_config],
            app_name="testapp",
            app_version="1.0",
            app_description="app description",
            values_source_list=[{"logger": mock_logging}],
            argv_source=[],
        )

        with config_manager.context() as config:
            crashstorage = CrashStorageBase(config, quit_check_callback=fake_quit_check)
            crashstorage.save_raw_crash({}, "payload", "ooid")
            crashstorage.save_processed({})
            assert_raises(NotImplementedError, crashstorage.get_raw_crash, "ooid")
            assert_raises(NotImplementedError, crashstorage.get_raw_dump, "ooid")
            assert_raises(NotImplementedError, crashstorage.get_unredacted_processed, "ooid")
            assert_raises(NotImplementedError, crashstorage.remove, "ooid")
            eq_(crashstorage.new_crashes(), [])
            crashstorage.close()
    def test_benchmarking_crashstore(self):
        required_config = Namespace()

        mock_logging = Mock()
        required_config.add_option("logger", default=mock_logging)
        required_config.update(BenchmarkingCrashStorage.get_required_config())
        fake_crash_store = Mock()

        config_manager = ConfigurationManager(
            [required_config],
            app_name="testapp",
            app_version="1.0",
            app_description="app description",
            values_source_list=[
                {"logger": mock_logging, "wrapped_crashstore": fake_crash_store, "benchmark_tag": "test"}
            ],
            argv_source=[],
        )

        with config_manager.context() as config:
            crashstorage = BenchmarkingCrashStorage(config, quit_check_callback=fake_quit_check)
            crashstorage.start_timer = lambda: 0
            crashstorage.end_timer = lambda: 1
            fake_crash_store.assert_called_with(config, fake_quit_check)

            crashstorage.save_raw_crash({}, "payload", "ooid")
            crashstorage.wrapped_crashstore.save_raw_crash.assert_called_with({}, "payload", "ooid")
            mock_logging.debug.assert_called_with("%s save_raw_crash %s", "test", 1)
            mock_logging.debug.reset_mock()

            crashstorage.save_processed({})
            crashstorage.wrapped_crashstore.save_processed.assert_called_with({})
            mock_logging.debug.assert_called_with("%s save_processed %s", "test", 1)
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_crash("uuid")
            crashstorage.wrapped_crashstore.get_raw_crash.assert_called_with("uuid")
            mock_logging.debug.assert_called_with("%s get_raw_crash %s", "test", 1)
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_dump("uuid")
            crashstorage.wrapped_crashstore.get_raw_dump.assert_called_with("uuid")
            mock_logging.debug.assert_called_with("%s get_raw_dump %s", "test", 1)
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_dumps("uuid")
            crashstorage.wrapped_crashstore.get_raw_dumps.assert_called_with("uuid")
            mock_logging.debug.assert_called_with("%s get_raw_dumps %s", "test", 1)
            mock_logging.debug.reset_mock()

            crashstorage.get_raw_dumps_as_files("uuid")
            crashstorage.wrapped_crashstore.get_raw_dumps_as_files.assert_called_with("uuid")
            mock_logging.debug.assert_called_with("%s get_raw_dumps_as_files %s", "test", 1)
            mock_logging.debug.reset_mock()

            crashstorage.get_unredacted_processed("uuid")
            crashstorage.wrapped_crashstore.get_unredacted_processed.assert_called_with("uuid")
            mock_logging.debug.assert_called_with("%s get_unredacted_processed %s", "test", 1)
            mock_logging.debug.reset_mock()
Beispiel #19
0
def main(app_object=None):
    if isinstance(app_object, six.string_types):
        app_object = class_converter(app_object)

    # the only config parameter is a special one that refers to a class or
    # module that defines an application.  In order to qualify, a class must
    # have a constructor that accepts a DotDict derivative as the sole
    # input parameter.  It must also have a 'main' function that accepts no
    # parameters.  For a module to be acceptable, it must have a main
    # function that accepts a DotDict derivative as its input parameter.
    app_definition = Namespace()
    app_definition.add_option('application',
                              doc='the fully qualified module or class of the '
                                  'application',
                              default=app_object,
                              from_string_converter=class_converter
                             )
    app_name = getattr(app_object, 'app_name', 'unknown')
    app_version = getattr(app_object, 'app_version', '0.0')
    app_description = getattr(app_object, 'app_description', 'no idea')


    # create an iterable collection of value sources
    # the order is important as these will supply values for the sources
    # defined in the_definition_source. The values will be overlain in turn.
    # First the os.environ values will be applied.  Then any values from an ini
    # file parsed by getopt.  Finally any values supplied on the command line
    # will be applied.
    value_sources = (ConfigFileFutureProxy,  # alias for allowing the user
                                             # to specify a config file on
                                             # the command line
                     environment,  # alias for os.environ
                     command_line) # alias for getopt

    # set up the manager with the definitions and values
    # it isn't necessary to provide the app_name because the
    # app_object passed in or loaded by the ConfigurationManager will alredy
    # have that information.
    config_manager = ConfigurationManager(app_definition,
                                          value_sources,
                                          app_name=app_name,
                                          app_version=app_version,
                                          app_description=app_description,
                                         )
    config = config_manager.get_config()

    app_object = config.admin.application

    if isinstance(app_object, type):
        # invocation of the app if the app_object was a class
        instance = app_object(config)
        instance.main()
    elif inspect.ismodule(app_object):
        # invocation of the app if the app_object was a module
        app_object.main(config)
    elif inspect.isfunction(app_object):
        # invocation of the app if the app_object was a function
        app_object(config)
    def test_operation_error_with_postgres_with_backoff_with_rollback(self):
        required_config = Namespace()
        required_config.add_option(
          'transaction_executor_class',
          default=TransactionExecutorWithBackoff,
          #default=TransactionExecutor,
          doc='a class that will execute transactions'
        )

        mock_logging = MockLogging()
        required_config.add_option('logger', default=mock_logging)

        config_manager = ConfigurationManager(
          [required_config],
          app_name='testapp',
          app_version='1.0',
          app_description='app description',
          values_source_list=[{'database_class': MockConnectionContext,
                               'backoff_delays': [2, 4, 6, 10, 15]}],
        )
        with config_manager.context() as config:
            executor = config.transaction_executor_class(config)
            _function_calls = []  # some mutable

            _sleep_count = []

            def mock_function(connection):
                assert isinstance(connection, MockConnection)
                connection.transaction_status = \
                  psycopg2.extensions.TRANSACTION_STATUS_INTRANS
                _function_calls.append(connection)
                # the default sleep times are going to be,
                # 2, 4, 6, 10, 15
                # so after 2 + 4 + 6 + 10 + 15 seconds
                # all will be exhausted
                if sum(_sleep_count) < sum([2, 4, 6, 10, 15]):
                    raise psycopg2.OperationalError('Arh!')

            def mock_sleep(n):
                _sleep_count.append(n)

            # monkey patch the sleep function from inside transaction_executor
            _orig_sleep = socorro.database.transaction_executor.time.sleep
            socorro.database.transaction_executor.time.sleep = mock_sleep

            try:
                executor(mock_function)
                self.assertTrue(_function_calls)
                self.assertEqual(commit_count, 1)
                self.assertEqual(rollback_count, 5)
                self.assertTrue(mock_logging.warnings)
                self.assertEqual(len(mock_logging.warnings), 5)
                self.assertTrue(len(_sleep_count) > 10)
            finally:
                socorro.database.transaction_executor.time.sleep = _orig_sleep
 def setup_configman_namespace(self):
     n = Namespace()
     n.add_option(
         'alpha',
         default=3,
         doc='the first parameter',
         is_argument=True
     )
     n.add_option(
         'beta',
         default='the second',
         doc='the first parameter',
         short_form='b',
     )
     n.add_option(
         'gamma',
         default="1 2 3",
         from_string_converter=quote_stripping_list_of_ints,
         to_string_converter=partial(
             list_to_str,
             delimiter=' '
         ),
         secret=True,
     )
     n.add_option(
         'delta',
         default=False,
         from_string_converter=boolean_converter
     )
     return n
Beispiel #22
0
def get_standard_config_manager(
    more_definitions=None,
    overrides=None,
):
    # MOCKED CONFIG DONE HERE
    required_config = Namespace()
    required_config.add_option(
        'logger',
        default=SilentFakeLogger(),
        doc='a logger',
    )
    required_config.add_option(
        'executor_identity',
        default=Mock()
    )

    if isinstance(more_definitions, Sequence):
        definitions = [required_config]
        definitions.extend(more_definitions)
    elif more_definitions is not None:
        definitions = [required_config, more_definitions]
    else:
        definitions = [required_config]

    local_overrides = [
        environment,
    ]

    if isinstance(overrides, Sequence):
        overrides.extend(local_overrides)
    elif overrides is not None:
        overrides = [overrides].extend(local_overrides)
    else:
        overrides = local_overrides

    config_manager = ConfigurationManager(
        definitions,
        values_source_list=overrides,
        app_name='test-crontabber',
        app_description=__doc__,
        argv_source=[]
    )

    # very useful debug
    #import contextlib
    #import sys
    #@contextlib.contextmanager
    #def stdout_opener():
        #yield sys.stdout
    #config_manager.write_conf('conf', stdout_opener)

    return config_manager
Beispiel #23
0
def logging_required_config(app_name):
    lc = Namespace()
    lc.namespace('logging')
    lc.logging.add_option(
      'syslog_host',
      doc='syslog hostname',
      default='localhost',
      reference_value_from='resource.logging',
    )
    lc.logging.add_option(
      'syslog_port',
      doc='syslog port',
      default=514,
      reference_value_from='resource.logging',
    )
    lc.logging.add_option(
      'syslog_facility_string',
      doc='syslog facility string ("user", "local0", etc)',
      default='user',
      reference_value_from='resource.logging',
    )
    lc.logging.add_option(
      'syslog_line_format_string',
      doc='python logging system format for syslog entries',
      default='%s (pid {process}): '
              '{asctime} {levelname} - {threadName} - '
              '{message}' % app_name,
      reference_value_from='resource.logging',
    )
    lc.logging.add_option(
      'syslog_error_logging_level',
      doc='logging level for the log file (10 - DEBUG, 20 '
          '- INFO, 30 - WARNING, 40 - ERROR, 50 - CRITICAL)',
      default=40,
      reference_value_from='resource.logging',
    )
    lc.logging.add_option(
      'stderr_line_format_string',
      doc='python logging system format for logging to stderr',
      default='{asctime} {levelname} - {threadName} - '
              '{message}',
      reference_value_from='resource.logging',
    )
    lc.logging.add_option(
      'stderr_error_logging_level',
      doc='logging level for the logging to stderr (10 - '
          'DEBUG, 20 - INFO, 30 - WARNING, 40 - ERROR, '
          '50 - CRITICAL)',
      default=10,
      reference_value_from='resource.logging',
    )
    return lc
def define_config():
    definition = Namespace()
    definition.add_option(
        name='redmine-root',
        doc='Root url of redmine server',
        short_form='r'
    )
    definition.add_option(
        name='redmine-apikey',
        doc='Redmine API key',
        short_form='a'
    )
    return definition
Beispiel #25
0
 def _common_config_setup(self):
     mock_logging = Mock()
     required_config = Namespace()
     required_config.namespace("hbase")
     required_config.hbase.hbase_class = crashstorage.HBaseCrashStorage
     required_config.hbase.add_option("logger", default=mock_logging)
     config_manager = ConfigurationManager(
         [required_config],
         app_name="testapp",
         app_version="1.0",
         app_description="app description",
         values_source_list=[{"hbase": {"logger": mock_logging}}],
     )
     return config_manager
Beispiel #26
0
def _get_config_manager():
    required_config = Namespace()

    webapi = Namespace()
    webapi.search_default_date_range = 7
    webapi.search_maximum_date_range = 365

    required_config.webapi = webapi

    config_manager = ConfigurationManager(
        [required_config], app_name="testapp", app_version="1.0", app_description="app description", argv_source=[]
    )

    return config_manager
Beispiel #27
0
 def _common_config_setup(self):
     mock_logging = Mock()
     required_config = Namespace()
     required_config.namespace("filesystem")
     required_config.filesystem.filesystem_class = crashstorage.FSRadixTreeStorage
     required_config.filesystem.add_option("logger", default=mock_logging)
     config_manager = ConfigurationManager(
         [required_config],
         app_name="testapp",
         app_version="1.0",
         app_description="app description",
         values_source_list=[{"filesystem": {"logger": mock_logging, "fs_root": self.fs_root}}],
     )
     return config_manager
Beispiel #28
0
def _get_config_manager():
    required_config = Namespace()

    required_config.search_default_date_range = 7
    required_config.search_maximum_date_range = 365

    config_manager = ConfigurationManager(
        [required_config],
        app_name='testapp',
        app_version='1.0',
        app_description='app description',
        argv_source=[]
    )

    return config_manager
Beispiel #29
0
def define_config():
    definition = Namespace()
    definition.add_option(
      name='devowel',
      default=False,
      doc='Removes all vowels (including Y)',
      short_form='d'
    )
    definition.add_option(
      name='file',
      default='',
      doc='file name for the input text',
      short_form='f'
    )
    return definition
Beispiel #30
0
def _get_config_manager():
    required_config = Namespace()

    webapi = Namespace()
    webapi.search_default_date_range = 7

    required_config.webapi = webapi

    config_manager = ConfigurationManager(
        [required_config],
        app_name='testapp',
        app_version='1.0',
        app_description='app description',
    )

    return config_manager
class IndexCleaner(RequiredConfig):
    """Delete Elasticsearch indices from our databases."""

    required_config = Namespace()
    required_config.add_option(
        'retention_policy',
        default=26,
        doc='Number of weeks to keep an index alive. ',
    )
    required_config.namespace('elasticsearch')
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )
    required_config.elasticsearch.add_option(
        'elasticsearch_index_regex',
        default='^socorro[0-9]{6}$',
        reference_value_from='resource.elasticsearch',
    )

    def __init__(self, config):
        super().__init__()
        self.config = config

    def delete_indices(self, predicate=None):
        """Delete crash indices that match the given predicate.

        :arg callable predicate: A callable of the form
            ``predicate(index)``, where ``index`` is a string containing
            the name of the index. If the callable returns true, the
            index will be deleted.

            The default is None, which deletes all crash indices.
        :returns: List of indexes that were deleted

        """
        es_class = self.config.elasticsearch.elasticsearch_class(
            self.config.elasticsearch)
        index_client = es_class.indices_client()

        status = index_client.status()
        indices = status['indices'].keys()

        aliases = index_client.get_aliases()

        deleted_indices = []
        for index in indices:
            # Some indices look like 'socorro%Y%W_%Y%M%d', but they are
            # aliased to the expected format of 'socorro%Y%W'. In such cases,
            # replace the index with the alias.
            if index in aliases and 'aliases' in aliases[index]:
                index_aliases = list(aliases[index]['aliases'].keys())
                if index_aliases:
                    index = index_aliases[0]

            if not re.match(
                    self.config.elasticsearch.elasticsearch_index_regex,
                    index):
                # This index doesn't look like a crash index, let's skip it.
                continue

            if predicate is None or predicate(index):
                index_client.delete(index)
                deleted_indices.append(index)

        return deleted_indices

    def delete_old_indices(self):
        self.delete_indices(self.is_index_old)

    def is_index_old(self, index):
        now = utc_now()
        policy_delay = datetime.timedelta(weeks=self.config.retention_policy)
        time_limit = (now - policy_delay).replace(tzinfo=None)

        # strptime ignores week numbers if a day isn't specified, so we append
        # '-1' and '-%w' to specify Monday as the day.
        index_date = datetime.datetime.strptime(
            index + '-1',
            self.config.elasticsearch.elasticsearch_index + '-%w')

        return index_date < time_limit
Beispiel #32
0
    def test_fallback_crash_storage(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=FallbackCrashStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'primary.storage_class':
            'socorro.unittest.external.test_crashstorage_base.A',
            'fallback.storage_class':
            'socorro.unittest.external.test_crashstorage_base.B',
        }
        cm = ConfigurationManager(n, values_source_list=[value])
        with cm.context() as config:
            self.assertEqual(config.primary.storage_class.foo, 'a')
            self.assertEqual(config.fallback.storage_class.foo, 'b')

            raw_crash = {'ooid': ''}
            crash_id = '1498dee9-9a45-45cc-8ec8-71bb62121203'
            dump = '12345'
            processed_crash = {'ooid': '', 'product': 17}
            fb_store = config.storage(config)

            # save_raw tests
            fb_store.primary_store.save_raw_crash = Mock()
            fb_store.fallback_store.save_raw_crash = Mock()
            fb_store.save_raw_crash(raw_crash, dump, crash_id)
            fb_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash, dump, crash_id)
            self.assertEqual(fb_store.fallback_store.save_raw_crash.call_count,
                             0)

            fb_store.primary_store.save_raw_crash = Mock()
            fb_store.primary_store.save_raw_crash.side_effect = Exception('!')
            fb_store.save_raw_crash(raw_crash, dump, crash_id)
            fb_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash, dump, crash_id)
            fb_store.fallback_store.save_raw_crash.assert_called_with(
                raw_crash, dump, crash_id)

            fb_store.fallback_store.save_raw_crash = Mock()
            fb_store.fallback_store.save_raw_crash.side_effect = Exception('!')
            self.assertRaises(PolyStorageError, fb_store.save_raw_crash,
                              raw_crash, dump, crash_id)
            fb_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash, dump, crash_id)
            fb_store.fallback_store.save_raw_crash.assert_called_with(
                raw_crash, dump, crash_id)

            # save_processed tests
            fb_store.primary_store.save_processed = Mock()
            fb_store.fallback_store.save_processed = Mock()
            fb_store.save_processed(processed_crash)
            fb_store.primary_store.save_processed.assert_called_with(
                processed_crash)
            self.assertEqual(fb_store.fallback_store.save_processed.call_count,
                             0)

            fb_store.primary_store.save_processed = Mock()
            fb_store.primary_store.save_processed.side_effect = Exception('!')
            fb_store.save_processed(processed_crash)
            fb_store.primary_store.save_processed.assert_called_with(
                processed_crash)
            fb_store.fallback_store.save_processed.assert_called_with(
                processed_crash)

            fb_store.fallback_store.save_processed = Mock()
            fb_store.fallback_store.save_processed.side_effect = Exception('!')
            self.assertRaises(PolyStorageError, fb_store.save_processed,
                              processed_crash)
            fb_store.primary_store.save_processed.assert_called_with(
                processed_crash)
            fb_store.fallback_store.save_processed.assert_called_with(
                processed_crash)

            # close tests
            fb_store.primary_store.close = Mock()
            fb_store.fallback_store.close = Mock()
            fb_store.close()
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()

            fb_store.primary_store.close = Mock()
            fb_store.fallback_store.close = Mock()
            fb_store.fallback_store.close.side_effect = NotImplementedError()
            fb_store.close()
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()

            fb_store.primary_store.close = Mock()
            fb_store.primary_store.close.side_effect = Exception('!')
            fb_store.close()
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()

            fb_store.fallback_store.close = Mock()
            fb_store.fallback_store.close.side_effect = Exception('!')
            self.assertRaises(PolyStorageError, fb_store.close)
            fb_store.primary_store.close.assert_called_with()
            fb_store.fallback_store.close.assert_called_with()
Beispiel #33
0
class BugzillaCronApp(PostgresTransactionManagedCronApp):
    app_name = 'bugzilla-associations'
    app_description = 'Bugzilla Associations'
    app_version = '0.1'

    required_config = Namespace()
    required_config.add_option('query',
                               default=_URL,
                               doc='Explanation of the option')

    required_config.add_option(
        'days_into_past',
        default=0,
        doc='number of days to look into the past for bugs (0 - use last '
        'run time)')

    def run(self, connection):
        # record_associations
        logger = self.config.logger

        try:
            # KeyError if it's never run successfully
            # TypeError if self.job_information is None
            last_run = self.job_information['last_success']
        except (KeyError, TypeError):
            last_run = utc_now() - datetime.timedelta(
                days=self.config.days_into_past and self.config.days_into_past
                or 30)
        last_run_formatted = last_run.strftime('%Y-%m-%d')
        query = self.config.query % last_run_formatted
        cursor = connection.cursor()
        for bug_id, status, resolution, short_desc, signature_set in self._iterator(
                query):
            logger.debug("bug %s (%s, %s) %s: %s", bug_id, status, resolution,
                         short_desc, signature_set)
            if not signature_set:
                cursor.execute(
                    """
                DELETE FROM bugs WHERE id = %s
                """, (bug_id, ))
                continue
            useful = False
            insert_made = False
            try:
                status_db, resolution_db, short_desc_db = singleRowSql(
                    cursor, """
                    SELECT status, resolution, short_desc
                    FROM bugs
                    WHERE id = %s
                    """, (bug_id, ))
                if status_db != status or resolution_db != resolution or short_desc_db != short_desc:
                    cursor.execute(
                        """
                      UPDATE bugs SET
                        status = %s, resolution = %s, short_desc = %s
                      WHERE id = %s""",
                        (status, resolution, short_desc, bug_id))
                    logger.info("bug status updated: %s - %s, %s", bug_id,
                                status, resolution)
                    useful = True

                cursor.execute(
                    """
                    SELECT signature FROM bug_associations WHERE bug_id = %s""",
                    (bug_id, ))
                signatures_db = [x[0] for x in cursor.fetchall()]

                for signature in signatures_db:
                    if signature not in signature_set:
                        cursor.execute(
                            """
                        DELETE FROM bug_associations
                        WHERE signature = %s and bug_id = %s""",
                            (signature, bug_id))
                        logger.info('association removed: %s - "%s"', bug_id,
                                    signature)
                        useful = True
            except SQLDidNotReturnSingleRow:
                cursor.execute(
                    """
                  INSERT INTO bugs
                  (id, status, resolution, short_desc)
                  VALUES (%s, %s, %s, %s)""",
                    (bug_id, status, resolution, short_desc))
                insert_made = True
                signatures_db = []

            for signature in signature_set:
                if signature not in signatures_db:
                    if self._has_signature_report(signature, cursor):
                        cursor.execute(
                            """
                          INSERT INTO bug_associations (signature, bug_id)
                          VALUES (%s, %s)""", (signature, bug_id))
                        logger.info('new association: %s - "%s"', bug_id,
                                    signature)
                        useful = True
                    else:
                        logger.info(
                            'rejecting association (no reports with this '
                            'signature): %s - "%s"', bug_id, signature)

            if useful:
                connection.commit()
                if insert_made:
                    logger.info('new bug: %s - %s, %s, "%s"', bug_id, status,
                                resolution, short_desc)
            else:
                connection.rollback()
                if insert_made:
                    logger.info(
                        'rejecting bug (no useful information): '
                        '%s - %s, %s, "%s"', bug_id, status, resolution,
                        short_desc)
                else:
                    logger.info(
                        'skipping bug (no new information): '
                        '%s - %s, %s, "%s"', bug_id, status, resolution,
                        short_desc)

    def _iterator(self, query):
        ##assert query.startswith('file://'), query## DEBUGGGINGG
        opener = urllib2.urlopen
        for report in csv.DictReader(opener(query)):
            yield (int(report['bug_id']), report['bug_status'],
                   report['resolution'], report['short_desc'],
                   self._signatures_found(report['cf_crash_signature']))

    def _signatures_found(self, signature):
        if not signature:
            return set()
        set_ = set()
        try:
            start = 0
            end = 0
            while True:
                start = signature.index("[@", end) + 2
                end = signature.index("]", end + 1)
                set_.add(signature[start:end].strip())
        except ValueError:
            # throw when index cannot match another sig, ignore
            pass
        return set_

    def _has_signature_report(self, signature, cursor):
        try:
            singleRowSql(
                cursor, """
                SELECT 1 FROM reports
                WHERE signature = %s LIMIT 1""", (signature, ))
            return True
        except SQLDidNotReturnSingleRow:
            return False
Beispiel #34
0
class CSignatureTool(CSignatureToolBase):
    """This is a C/C++ signature generation class that gets its initialization
    from configuration."""

    required_config = Namespace()
    required_config.add_option(
        'signature_sentinels',
        doc='a list of frame signatures that should always be considered top '
        'of the stack if present in the stack',
        default="""['_purecall',
               ('mozilla::ipc::RPCChannel::Call(IPC::Message*, IPC::Message*)',
                lambda x: 'CrashReporter::CreatePairedMinidumps(void*, '
                  'unsigned long, nsAString_internal*, nsILocalFile**, '
                  'nsILocalFile**)' in x
               ),
               'Java_org_mozilla_gecko_GeckoAppShell_reportJavaCrash',
               'google_breakpad::ExceptionHandler::HandleInvalidParameter'
                  '(wchar_t const*, wchar_t const*, wchar_t const*, unsigned '
                  'int, unsigned int)'
              ]""",
        from_string_converter=eval)
    required_config.add_option(
        'irrelevant_signature_re',
        doc='a regular expression matching frame signatures that should be '
        'ignored when generating an overall signature',
        default="""'|'.join([
          '@0x[0-9a-fA-F]{2,}',
          '@0x[1-9a-fA-F]',
          'ashmem',
          'app_process@0x.*',
          'core\.odex@0x.*',
          '_CxxThrowException',
          'dalvik-heap',
          'dalvik-jit-code-cache',
          'dalvik-LinearAlloc',
          'dalvik-mark-stack',
          'data@app@org\.mozilla\.f.*-\d\.apk@classes\.dex@0x.*',
          'framework\.odex@0x.*',
          'google_breakpad::ExceptionHandler::HandleInvalidParameter.*',
          'KiFastSystemCallRet',
          'libandroid_runtime\.so@0x.*',
          'libbinder\.so@0x.*',
          'libc\.so@.*',
          'libc-2\.5\.so@.*',
          'libEGL\.so@.*',
          'libdvm\.so\s*@\s*0x.*',
          'libgui\.so@0x.*',
          'libicudata.so@.*',
          'libMali\.so@0x.*',
          'libutils\.so@0x.*',
          'libz\.so@0x.*',
          'linux-gate\.so@0x.*',
          'mnt@asec@org\.mozilla\.f.*-\d@pkg\.apk@classes\.dex@0x.*',
          'MOZ_Assert',
          'MOZ_Crash',
          'mozcrt19.dll@0x.*',
          'mozilla::ipc::RPCChannel::Call\(IPC::Message\*, IPC::Message\*\)',
          '_NSRaiseError',
          '(Nt|Zw)WaitForSingleObject(Ex)?',
          '(Nt|Zw)WaitForMultipleObjects(Ex)?',
          'nvmap@0x.*',
          'org\.mozilla\.f.*-\d\.apk@0x.*',
          'RaiseException',
          'RtlpAdjustHeapLookasideDepth',
          'system@framework@.*\.jar@classes\.dex@0x.*',
          '___TERMINATING_DUE_TO_UNCAUGHT_EXCEPTION___',
          'WaitForSingleObjectExImplementation',
          'WaitForMultipleObjectsExImplementation',
          'RealMsgWaitFor.*'
          '_ZdlPv',
          'zero',
          ])""",
        from_string_converter=eval)
    required_config.add_option(
        'prefix_signature_re',
        doc='a regular expression matching frame signatures that should always '
        'be coupled with the following frame signature when generating an '
        'overall signature',
        default="""'|'.join([
          '@0x0',
          '.*CrashAtUnhandlableOOM',
          'Abort',
          '.*abort',
          '_alloca_probe.*',
          '__android_log_assert',
          'arena_.*',
          'BaseGetNamedObjectDirectory',
          '.*calloc',
          'cert_.*',
          'CERT_.*',
          'CFRelease',
          '_chkstk',
          'CrashInJS',
          '__delayLoadHelper2',
          'dlmalloc',
          'dlmalloc_trim',
          'dvm.*',
          'EtwEventEnabled',
          'extent_.*',
          'fastcopy_I',
          'fastzero_I',
          '_files_getaddrinfo',
          '.*free',
          'GCGraphBuilder::NoteXPCOMChild',
          'getanswer',
          'huge_dalloc',
          'ialloc',
          'imalloc',
          'init_library',
          'isalloc',
          'je_malloc',
          'jemalloc_crash',
          'je_realloc',
          'JNI_CreateJavaVM',
          '_JNIEnv.*',
          'JNI_GetCreatedJavaVM.*',
          'js::AutoCompartment::AutoCompartment.*',
          'JSAutoCompartment::JSAutoCompartment.*',
          'JS_DHashTableEnumerate',
          'JS_DHashTableOperate',
          'kill',
          '__libc_android_abort',
          'libobjc.A.dylib@0x1568.',
          '(libxul\.so|xul\.dll|XUL)@0x.*',
          'LL_.*',
          'malloc',
          '_MD_.*',
          'memcmp',
          '__memcmp16',
          'memcpy',
          'memmove',
          'memset',
          'mozalloc_abort.*',
          'mozalloc_handle_oom',
          'moz_free',
          'mozilla::AndroidBridge::AutoLocalJNIFrame::~AutoLocalJNIFrame',
          'mozilla::ipc::RPCChannel::Call',
          'mozilla::ipc::RPCChannel::CxxStackFrame::CxxStackFrame',
          'mozilla::ipc::RPCChannel::EnteredCxxStack',
          'mozilla::ipc::RPCChannel::Send',
          'mozilla.*FatalError',
          'moz_xmalloc',
          'moz_xrealloc',
          'NP_Shutdown',
          'nsACString_internal::Assign.*',
          'nsCOMPtr.*',
          'NS_ABORT_OOM.*',
          'NS_DebugBreak.*',
          '[-+]\[NSException raise(:format:(arguments:)?)?\]',
          'nsObjCExceptionLogAbort(\(.*?\)){0,1}',
          'nsRefPtr.*',
          'NSS.*',
          'nss.*',
          'nsTArray<.*',
          'nsTArray_base<.*',
          'NtUser.*',
          'objc_exception_throw',
          'objc_msgSend',
          'operator new\([^,\)]+\)',
          'PL_.*',
          'port_.*',
          'PORT_.*',
          '_PR_.*',
          'PR_.*',
          'pthread_mutex_lock',
          '_purecall',
          'raise',
          'realloc',
          'recv',
          '_RTC_Terminate',
          'Rtl.*',
          '_Rtl.*',
          '__Rtl.*',
          'SEC_.*Item',
          'seckey_.*',
          'SECKEY_.*',
          '__security_check_cookie',
          'send',
          'setjmp',
          'sigblock',
          'sigprocmask',
          'SocketAccept',
          'SocketAcceptRead',
          'SocketAvailable',
          'SocketAvailable64',
          'SocketBind',
          'SocketClose',
          'SocketConnect',
          'SocketGetName',
          'SocketGetPeerName',
          'SocketListen',
          'SocketPoll',
          'SocketRead',
          'SocketRecv',
          'SocketSend',
          'SocketShutdown',
          'SocketSync',
          'SocketTransmitFile',
          'SocketWrite',
          'SocketWritev',
          'ssl_.*',
          'SSL_.*',
          'strcat',
          'ssl3_.*',
          'strchr',
          'strcmp',
          'strcpy',
          '.*strdup',
          'strlen',
          'strncpy',
          'strzcmp16',
          'strstr',
          '__swrite',
          'TouchBadMemory',
          '_VEC_memcpy',
          '_VEC_memzero',
          '.*WaitFor.*',
          'wcslen',
          '__wrap_realloc',
          'WSARecv.*',
          'WSASend.*',
          '_ZdaPvRKSt9nothrow_t\"',
          'zzz_AsmCodeRange_.*',
          '.*DebugAbort.*',
          'mozilla::ipc::MessageChannel::~MessageChannel.*',
        ])""",
        from_string_converter=eval)
    required_config.add_option(
        'signatures_with_line_numbers_re',
        doc='any signatures that match this list should be combined with their '
        'associated source code line numbers',
        default='js_Interpret')

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(CSignatureTool, self).__init__(config, quit_check_callback)
        self.irrelevant_signature_re = \
             re.compile(self.config.irrelevant_signature_re)
        self.prefix_signature_re =  \
            re.compile(self.config.prefix_signature_re)
        self.signatures_with_line_numbers_re = \
            re.compile(self.config.signatures_with_line_numbers_re)
        self.signature_sentinels = config.signature_sentinels
Beispiel #35
0
    def test_processed_crash_storage(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=PrimaryDeferredProcessedStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'primary.storage_class':
            'socorro.unittest.external.test_crashstorage_base.A',
            'deferred.storage_class':
            'socorro.unittest.external.test_crashstorage_base.B',
            'processed.storage_class':
            'socorro.unittest.external.test_crashstorage_base.B',
            'deferral_criteria': lambda x: x.get('foo') == 'foo'
        }
        cm = ConfigurationManager(n, values_source_list=[value])
        with cm.context() as config:
            self.assertEqual(config.primary.storage_class.foo, 'a')
            self.assertEqual(config.deferred.storage_class.foo, 'b')
            self.assertEqual(config.processed.storage_class.foo, 'b')

            raw_crash = {'ooid': ''}
            crash_id = '1498dee9-9a45-45cc-8ec8-71bb62121203'
            dump = '12345'
            deferred_crash = {'ooid': '', 'foo': 'foo'}
            processed_crash = {'ooid': '', 'product': 17}
            pd_store = config.storage(config)

            # save_raw tests
            pd_store.primary_store.save_raw_crash = Mock()
            pd_store.deferred_store.save_raw_crash = Mock()
            pd_store.processed_store.save_raw_crash = Mock()
            pd_store.save_raw_crash(raw_crash, dump, crash_id)
            pd_store.primary_store.save_raw_crash.assert_called_with(
                raw_crash, dump, crash_id)
            self.assertEqual(pd_store.deferred_store.save_raw_crash.call_count,
                             0)

            pd_store.save_raw_crash(deferred_crash, dump, crash_id)
            pd_store.deferred_store.save_raw_crash.assert_called_with(
                deferred_crash, dump, crash_id)

            # save_processed tests
            pd_store.primary_store.save_processed = Mock()
            pd_store.deferred_store.save_processed = Mock()
            pd_store.processed_store.save_processed = Mock()
            pd_store.save_processed(processed_crash)
            pd_store.processed_store.save_processed.assert_called_with(
                processed_crash)
            self.assertEqual(pd_store.primary_store.save_processed.call_count,
                             0)

            pd_store.save_processed(deferred_crash)
            pd_store.processed_store.save_processed.assert_called_with(
                deferred_crash)

            # close tests
            pd_store.primary_store.close = Mock()
            pd_store.deferred_store.close = Mock()
            pd_store.close()
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()

            pd_store.primary_store.close = Mock()
            pd_store.deferred_store.close = Mock()
            pd_store.deferred_store.close.side_effect = NotImplementedError()
            pd_store.close()
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()

            pd_store.primary_store.close = Mock()
            pd_store.primary_store.close.side_effect = Exception('!')
            pd_store.close()
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()

            pd_store.deferred_store.close = Mock()
            pd_store.deferred_store.close.side_effect = Exception('!')
            self.assertRaises(PolyStorageError, pd_store.close)
            pd_store.primary_store.close.assert_called_with()
            pd_store.deferred_store.close.assert_called_with()
Beispiel #36
0
class ElasticSearchCrashStorage(CrashStorageBase):
    """This class sends processed crash reports to elasticsearch. It handles
    indices creation and type mapping. It cannot store raw dumps or raw crash
    reports as Socorro doesn't need those in elasticsearch at the moment.
    """

    required_config = Namespace()
    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithLimitedBackoff",
        doc='a class that will manage transactions',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )
    required_config.add_option(
        'elasticsearch_class',
        default='socorro.external.elasticsearch.connection_context.'
        'ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )
    required_config.add_option(
        'elasticsearch_base_settings',
        default='%s/mappings/socorro_index_settings.json' % DIRECTORY,
        doc='the file containing the mapping of the indexes receiving '
        'crash reports',
        reference_value_from='resource.elasticsearch',
    )
    required_config.add_option(
        'elasticsearch_emails_index_settings',
        default='%s/mappings/socorro_emails_index_settings.json' % DIRECTORY,
        doc='the file containing the mapping of the indexes receiving '
        'email addresses for the automatic-emails cron job',
        reference_value_from='resource.elasticsearch',
    )
    required_config.add_option(
        'elasticsearch_emails_index',
        default='socorro_emails',
        doc='the index that handles data about email addresses for '
        'the automatic-emails cron job',
        reference_value_from='resource.elasticsearch',
    )
    required_config.add_option(
        'use_mapping_file',
        default=True,
        doc='load the mapping from a file if true, load it from the database '
        'otherwise',
        reference_value_from='resource.elasticsearch',
    )

    operational_exceptions = (pyelasticsearch.exceptions.ConnectionError,
                              pyelasticsearch.exceptions.Timeout)

    conditional_exceptions = ()

    indices_cache = set()

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(ElasticSearchCrashStorage,
              self).__init__(config, quit_check_callback)
        self.transaction = config.transaction_executor_class(
            config, self, quit_check_callback)
        if self.config.elasticsearch_urls:
            self.es = pyelasticsearch.ElasticSearch(
                self.config.elasticsearch_urls,
                timeout=self.config.elasticsearch_timeout)
        else:
            config.logger.warning('elasticsearch crash storage is disabled.')

    #--------------------------------------------------------------------------
    def save_processed(self, processed_crash):
        crash_id = processed_crash['uuid']
        crash_document = {
            'crash_id': crash_id,
            'processed_crash': processed_crash,
            'raw_crash': None
        }
        try:
            # Why is the function specified as unbound?  The elastic search
            # crashstorage class serves as its own connection context object.
            # In otherwords, it has no actual connection class.  The
            # transaction executor passes a connection object as the first
            # paremeter to the function that it calls.  That means that it will
            # be passing the ElasticSearchCrashStorage instance as the self
            # parameter.  A bound function would already have that input
            # parameter and thus an exception would be raised. By using an
            # unbound function, we avoid this problem.
            self.transaction(self.__class__._submit_crash_to_elasticsearch,
                             crash_id, crash_document)
        except KeyError, x:
            if x == 'uuid':
                raise CrashIDNotFound
            raise
Beispiel #37
0
    class ESBulkClassTemplate(base_class):
        required_config = Namespace()
        required_config.add_option(
            'items_per_bulk_load',
            default=500,
            doc="the number of crashes that triggers a flush to ES"
        )
        required_config.add_option(
            'maximum_queue_size',
            default=512,
            doc='the maximum size of the internal queue'
        )

        #----------------------------------------------------------------------
        def __init__(self, config, quit_check_callback=None):
            super(ESBulkClassTemplate, self).__init__(
                config,
                quit_check_callback
            )

            self.task_queue = QueueWrapper(config.maximum_queue_size)
            self.consuming_thread = Thread(
                name="ConsumingThread",
                target=self._consuming_thread_func
            )

            # overwrites original
            self.transaction = config.transaction_executor_class(
                config,
                QueueContextSource(self.task_queue),
                quit_check_callback
            )
            self.done = False
            self.consuming_thread.start()

        #----------------------------------------------------------------------
        def _submit_crash_to_elasticsearch(self, queue, crash_document):
            # Massage the crash such that the date_processed field is formatted
            # in the fashion of our established mapping.
            # First create a datetime object from the string in the crash
            # report.
            self.reconstitute_datetimes(crash_document['processed_crash'])

            # Obtain the index name.
            es_index = self.get_index_for_crash(
                crash_document['processed_crash']['date_processed']
            )
            es_doctype = self.config.elasticsearch.elasticsearch_doctype
            crash_id = crash_document['crash_id']

            # Attempt to create the index; it's OK if it already exists.
            if es_index not in self.indices_cache:
                index_creator = self.config.index_creator_class(
                    config=self.config
                )
                index_creator.create_socorro_index(es_index)

            action = {
                '_index': es_index,
                '_type': es_doctype,
                '_id': crash_id,
                '_source': crash_document,
            }
            queue.put(action)

        #----------------------------------------------------------------------
        def _consumer_iter(self):
            while True:
                try:
                    crash_document = self.task_queue.get()
                except Exception:
                    self.config.logger.critical(
                        "Failure in ES Bulktask_queue",
                        exc_info=True
                    )
                    crash_document = None
                if crash_document is None:
                    self.done = True
                    break
                yield crash_document  # execute the task

        #----------------------------------------------------------------------
        def close(self):
            self.task_queue.put(None)
            self.consuming_thread.join()

        #----------------------------------------------------------------------
        def _consuming_thread_func(self):  # execute the bulk load
            with self.es_context() as es:
                try:
                    elasticsearch.helpers.bulk(
                        es,
                        self._consumer_iter(),
                        chunk_size=self.config.items_per_bulk_load
                    )
                except Exception:
                    self.config.logger.critical(
                        "Failure in ES elasticsearch.helpers.bulk",
                        exc_info=True
                    )
class FetchADIFromHiveCronApp(BaseCronApp):
    """ This cron is our daily blocklist ping web logs query
        that rolls up all the browser checkins and let's us know
        how many browsers we think were active on the internet
        for a particular day """
    app_name = 'fetch-adi-from-hive'
    app_description = 'Fetch ADI From Hive App'
    app_version = '0.1'

    required_config = Namespace()
    required_config.add_option(
        'query',
        default=_QUERY,
        doc='Hive query for fetching ADI data')

    required_config.add_option(
        'hive_host',
        default='localhost',
        doc='Hostname to run Hive query on')

    required_config.add_option(
        'hive_port',
        default=10000,
        doc='Port to run Hive query on')

    required_config.add_option(
        'hive_user',
        default='socorro',
        doc='User to connect to Hive with')

    required_config.add_option(
        'hive_password',
        default='ignored',
        doc='Password to connect to Hive with')

    required_config.add_option(
        'hive_database',
        default='default',
        doc='Database name to connect to Hive with')

    required_config.add_option(
        'hive_auth_mechanism',
        default='PLAIN',
        doc='Auth mechanism for Hive')

    required_config.add_option(
        'timeout',
        default=30 * 60,  # 30 minutes
        doc='number of seconds to wait before timing out')

    @staticmethod
    def remove_control_characters(s):
        if isinstance(s, str):
            s = unicode(s, 'utf-8', errors='replace')
        return ''.join(c for c in s if unicodedata.category(c)[0] != "C")

    def run(self, connection, date):
        target_date = (date - datetime.timedelta(days=1)).strftime('%Y-%m-%d')

        raw_adi_logs_pathname = os.path.join(
            tempfile.gettempdir(),
            "%s.raw_adi_logs.TEMPORARY%s" % (
                target_date,
                '.txt'
            )
        )
        try:
            with codecs.open(raw_adi_logs_pathname, 'w', 'utf-8') as f:
                hive = pyhs2.connect(
                    host=self.config.hive_host,
                    port=self.config.hive_port,
                    authMechanism=self.config.hive_auth_mechanism,
                    user=self.config.hive_user,
                    password=self.config.hive_password,
                    database=self.config.hive_database,
                    # the underlying TSocket setTimeout() wants milliseconds
                    timeout=self.config.timeout * 1000
                )

                cur = hive.cursor()
                query = self.config.query % target_date
                cur.execute(query)
                for row in cur:
                    if None in row:
                        continue
                    f.write(
                        "\t"
                        .join(
                            self.remove_control_characters(
                                urllib2.unquote(v)
                            ).replace('\\', '\\\\')
                            if isinstance(v, basestring) else str(v)
                            for v in row
                        )
                    )
                    f.write("\n")

            with codecs.open(raw_adi_logs_pathname, 'r', 'utf-8') as f:
                pgcursor = connection.cursor()
                pgcursor.copy_from(
                    f,
                    'raw_adi_logs',
                    null='None',
                    columns=[
                        'report_date',
                        'product_name',
                        'product_os_platform',
                        'product_os_version',
                        'product_version',
                        'build',
                        'build_channel',
                        'product_guid',
                        'count'
                    ]
                )
                pgcursor.execute(_RAW_ADI_QUERY, (target_date,))
        finally:
            if os.path.isfile(raw_adi_logs_pathname):
                os.remove(raw_adi_logs_pathname)
class BetaVersionRule(Rule):
    required_config = Namespace()
    required_config.add_option(
        'database_class',
        doc="the class of the database",
        default='socorro.external.postgresql.connection_context.'
        'ConnectionContext',
        from_string_converter=str_to_python_object,
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithInfiniteBackoff",
        doc='a class that will manage transactions',
        from_string_converter=str_to_python_object,
        reference_value_from='resource.postgresql',
    )

    def __init__(self, config):
        super(BetaVersionRule, self).__init__(config)
        database = config.database_class(config)
        self.transaction = config.transaction_executor_class(
            config,
            database,
        )
        self._versions_data_cache = {}

    def version(self):
        return '1.0'

    def _get_version_data(self, product, version, build_id):
        """Return the real version number of a specific product, version and
        build.

        For example, beta builds of Firefox declare their version
        number as the major version (i.e. version 54.0b3 would say its version
        is 54.0). This database call returns the actual version number of said
        build (i.e. 54.0b3 for the previous example).
        """
        key = '%s:%s:%s' % (product, version, build_id)

        if key in self._versions_data_cache:
            return self._versions_data_cache[key]

        sql = """
            SELECT
                pv.version_string
            FROM product_versions pv
                LEFT JOIN product_version_builds pvb ON
                    (pv.product_version_id = pvb.product_version_id)
            WHERE pv.product_name = %(product)s
            AND pv.release_version = %(version)s
            AND pvb.build_id = %(build_id)s
        """
        params = {
            'product': product,
            'version': version,
            'build_id': build_id,
        }
        results = self.transaction(execute_query_fetchall, sql, params)
        for real_version, in results:
            self._versions_data_cache[key] = real_version

        return self._versions_data_cache.get(key)

    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        try:
            # We apply this Rule only if the release channel is beta, because
            # beta versions are the only ones sending an "incorrect" version
            # number in their data.
            # 2017-06-14: Ohai! This is not true anymore! With the removal of
            # the aurora channel, there is now a new type of build called
            # "DevEdition", that is released on the aurora channel, but has
            # the same version naming logic as builds on the beta channel.
            # We thus want to apply the same logic to aurora builds
            # as well now. Note that older crash reports won't be affected,
            # because they have a "correct" version number, usually containing
            # the letter 'a' (like '50.0a2').
            return processed_crash['release_channel'].lower() in ('beta',
                                                                  'aurora')
        except KeyError:
            # No release_channel.
            return False

    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        try:
            # Sanitize the build id to avoid errors during the SQL query.
            try:
                build_id = int(processed_crash['build'])
            except ValueError:
                build_id = None

            real_version = self._get_version_data(
                processed_crash['product'],
                processed_crash['version'],
                build_id,
            )

            if real_version:
                processed_crash['version'] = real_version
            else:
                # This is a beta version but we do not have data about it. It
                # could be because we don't have it yet (if the cron jobs are
                # running late for example), so we mark this crash. This way,
                # we can reprocess it later to give it the correct version.
                processed_crash['version'] += 'b0'
                processor_meta.processor_notes.append(
                    'release channel is %s but no version data was found '
                    '- added "b0" suffix to version number' %
                    (processed_crash['release_channel'], ))
        except KeyError:
            return False
        return True
Beispiel #40
0
    def test_programming_error_with_postgres_with_backoff_with_rollback(self):
        required_config = Namespace()
        required_config.add_option(
            'transaction_executor_class',
            default=TransactionExecutorWithInfiniteBackoff,
            doc='a class that will execute transactions')
        required_config.add_option('database_class',
                                   default=MockConnectionContext,
                                   from_string_converter=class_converter)

        mock_logging = MockLogging()
        required_config.add_option('logger', default=mock_logging)

        config_manager = ConfigurationManager(
            [required_config],
            app_name='testapp',
            app_version='1.0',
            app_description='app description',
            values_source_list=[{
                'backoff_delays': [2, 4, 6, 10, 15]
            }],
            argv_source=[])
        with config_manager.context() as config:
            mocked_context = config.database_class(config)
            executor = config.transaction_executor_class(
                config, mocked_context)
            _function_calls = []  # some mutable

            _sleep_count = []

            def mock_function_struggling(connection):
                assert isinstance(connection, MockConnection)
                connection.transaction_status = psycopg2.extensions.TRANSACTION_STATUS_INTRANS
                _function_calls.append(connection)
                # the default sleep times are going to be,
                # 2, 4, 6, 10, 15
                # so after 2 + 4 + 6 + 10 + 15 seconds
                # all will be exhausted
                if sum(_sleep_count) < sum([2, 4, 6, 10, 15]):
                    raise psycopg2.ProgrammingError(
                        'SSL SYSCALL error: EOF detected')

            def mock_sleep(n):
                _sleep_count.append(n)

            # monkey patch the sleep function from inside transaction_executor
            _orig_sleep = socorro.database.transaction_executor.time.sleep
            socorro.database.transaction_executor.time.sleep = mock_sleep

            try:
                executor(mock_function_struggling)
                assert _function_calls
                assert commit_count == 1
                assert rollback_count == 5
                assert mock_logging.criticals
                assert len(mock_logging.criticals) == 5
                assert len(_sleep_count) > 10
            finally:
                socorro.database.transaction_executor.time.sleep = _orig_sleep

        # this time, simulate an actual code bug where a callable function
        # raises a ProgrammingError() exception by, for example, a syntax error
        with config_manager.context() as config:
            mocked_context = config.database_class(config)
            executor = config.transaction_executor_class(
                config, mocked_context)

            def mock_function_developer_mistake(connection):
                assert isinstance(connection, MockConnection)
                connection.transaction_status = psycopg2.extensions.TRANSACTION_STATUS_INTRANS
                raise psycopg2.ProgrammingError("syntax error")

            with pytest.raises(psycopg2.ProgrammingError):
                executor(mock_function_developer_mistake)
Beispiel #41
0
class ConnectionContext(RequiredConfig):
    """Postgres Connection Context"""
    required_config = Namespace()
    required_config.add_option(
        name='database_hostname',
        default=get_field_from_pg_database_url('hostname', 'localhost'),
        doc='the hostname of the database',
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        name='database_name',
        default=get_field_from_pg_database_url('path', ' breakpad')[1:],
        doc='the name of the database',
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        name='database_port',
        default=get_field_from_pg_database_url('port', 5432),
        doc='the port for the database',
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        name='database_username',
        default=get_field_from_pg_database_url('username', 'breakpad_rw'),
        doc='the name of the user within the database',
        reference_value_from='secrets.postgresql',
    )
    required_config.add_option(
        name='database_password',
        default=get_field_from_pg_database_url('password', 'aPassword'),
        doc="the user's database password",
        reference_value_from='secrets.postgresql',
        secret=True,
    )

    RETRYABLE_EXCEPTIONS = (psycopg2.InterfaceError, socket.timeout)

    def __init__(self, config, local_config=None):
        """Initialize the parts needed to start making database connections

        parameters:
            config - the complete config for the app.  If a real app, this
                     would be where a logger or other resources could be
                     found.
            local_config - this is the namespace within the complete config
                           where the actual database parameters are found

        """
        super().__init__()
        self.config = config
        self.logger = logging.getLogger(__name__ + '.' +
                                        self.__class__.__name__)
        if local_config is None:
            local_config = config
        if local_config['database_port'] is None:
            local_config['database_port'] = 5432
        self.dsn = ("host=%(database_hostname)s "
                    "dbname=%(database_name)s "
                    "port=%(database_port)s "
                    "user=%(database_username)s "
                    "password=%(database_password)s") % local_config

    def connection(self, name_unused=None):
        return psycopg2.connect(self.dsn)

    @contextlib.contextmanager
    def __call__(self, name=None):
        """returns a database connection wrapped in a contextmanager.

        The context manager will assure that the connection is closed but will
        not try to commit or rollback lingering transactions.

        parameters:
            name - an optional name for the database connection
        """
        conn = self.connection(name)
        try:
            yield conn
        finally:
            self.close_connection(conn)

    def close_connection(self, connection, force=False):
        """close the connection passed in.

        This function exists to allow derived classes to override the closing
        behavior.

        parameters:
            connection - the database connection object
            force - unused boolean to force closure; used in derived classes
        """
        connection.close()

    def close(self):
        pass

    def is_retryable_exception(self, exc):
        """Return True if this is a retryable exception"""
        message = exc.args[0]
        if message in ('SSL SYSCALL error: EOF detected', ):
            # Ideally we'd like to check against exc.pgcode values
            # but certain odd ProgrammingError exceptions don't have
            # pgcodes so we have to rely on reading the pgerror :(
            return True

        if isinstance(
                exc, psycopg2.OperationalError) and message != 'out of memory':
            return True

        return isinstance(exc, self.RETRYABLE_EXCEPTIONS)

    def force_reconnect(self):
        pass
Beispiel #42
0
class FileSystemThrottledCrashStorage(FileSystemRawCrashStorage):
    """This varient of file system storage segregates crashes based on
    the result of Collector throttling.  When the collector recieves a crash,
    it applies throttle rules and saves the result in the crash json under the
    key 'legacy_processing'.  Only crashes that have a value of 0 in that field
    will eventually make it on to processing.
        legacy_processing == 0 : crashes stored in the filesystem rooted at
                                 'std_fs_root' (standard file system storage)
                                 defined in the parent class
        legacy_processing == 1 : crashes stored in the filesysetm rooted at
                                 'def_fs_root' (deferred file system storage)
                                 defined in this class
    This class only implements raw crash storage and is not appropriate for
    storing processed crashes."""

    required_config = Namespace()
    required_config.add_option(
        'def_fs_root',
        doc='a path to a local file system',
        default='./deferredCrashStore',
        reference_value_from='resource.filesystem',
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(FileSystemThrottledCrashStorage, self).__init__(config)

        self.def_crash_store = JsonDumpStorage(
            root=config.def_fs_root,
            maxDirectoryEntries=config.dump_dir_count,
            jsonSuffix=config.json_file_suffix,
            dumpSuffix=config.dump_file_suffix,
            dumpGID=config.dump_gid,
            dumpPermissions=config.dump_permissions,
            dirPermissions=config.dir_permissions,
            logger=config.logger)
        self._crash_store_tuple = (self.std_crash_store, self.def_crash_store)

    #--------------------------------------------------------------------------
    def save_raw_crash(self, raw_crash, dump, crash_id):
        """save the raw crash and the dump in the appropriate file system
        based on the value of 'legacy_processing' with the raw_crash itself"""
        try:
            if raw_crash['legacy_processing'] == ACCEPT:
                self._do_save_raw(self.std_crash_store, raw_crash, dump,
                                  crash_id)
            else:
                self._do_save_raw(self.def_crash_store, raw_crash, dump,
                                  crash_id)
        except KeyError:
            # if 'legacy_processing' is missing, then it assumed that this
            # crash should be processed.  Therefore save it into standard
            # storage
            self._do_save_raw(self.std_crash_store, raw_crash, dump, crash_id)

    #--------------------------------------------------------------------------
    def get_raw_crash(self, crash_id):
        """fetch the raw_crash trying each file system in turn"""
        for a_crash_store in self._crash_store_tuple:
            try:
                pathname = a_crash_store.getJson(crash_id)
                return self._load_raw_crash_from_file(pathname)
            except OSError:
                # only raise the exception if we've got no more file systems
                # to look through
                if a_crash_store is self._crash_store_tuple[-1]:
                    raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def get_raw_dump(self, crash_id, dump_name=None):
        """fetch the dump trying each file system in turn"""
        for a_crash_store in self._crash_store_tuple:
            try:
                job_pathname = a_crash_store.getDump(crash_id, dump_name)
                with open(job_pathname) as dump_file:
                    dump = dump_file.read()
                return dump
            except OSError:
                # only raise the exception if we've got no more file systems
                # to look through
                if a_crash_store is self._crash_store_tuple[-1]:
                    raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def get_raw_dumps(self, crash_id):
        """fetch the dump trying each file system in turn"""
        for a_crash_store in self._crash_store_tuple:
            try:
                return self._do_get_raw_dumps(crash_id, a_crash_store)
            except CrashIDNotFound:
                pass  # try the next crash store
        raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def get_raw_dumps_as_files(self, crash_id):
        """fetch the dump trying each file system in turn"""
        for a_crash_store in self._crash_store_tuple:
            try:
                return a_crash_store.get_dumps(crash_id)
            except CrashIDNotFound:
                pass  # try the next crash store
        raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def remove(self, crash_id):
        """try to remove the raw_crash and the dump from each  """
        for a_crash_store in self._crash_store_tuple:
            try:
                a_crash_store.remove(crash_id)  # raises NoSuchUuidFound if
                # unsuccessful.
                return  # break the loop as soon as we succeed
            except (NoSuchUuidFound, OSError):
                # only raise the exception if we've got no more file systems
                # to look through
                if a_crash_store is self._crash_store_tuple[-1]:
                    raise CrashIDNotFound(crash_id)
Beispiel #43
0
class FileSystemRawCrashStorage(CrashStorageBase):
    """This crash storage class impements only the raw crash part of the
    api.  Raw crashes (the json file and the binary dump) are stored in a
    file system.  This class is appropriate for fast storage of crashes into
    a local file system.  In 2011, a varient of this code base was adopted
    by the Socorro Collector for fast temporary storage as crashes came in."""

    required_config = Namespace()
    required_config.add_option(
        'std_fs_root',
        doc='a path to a local file system',
        default='./primaryCrashStore',
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'dump_dir_count',
        doc='the number of dumps to be stored in a single directory in the '
        'local file system',
        default=1024,
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'dump_gid',
        doc='the group ID for saved crashes in local file system (optional)',
        default='',
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'dump_permissions',
        doc='a number used for permissions crash dump files in the local '
        'file system',
        default=stat.S_IRGRP | stat.S_IWGRP | stat.S_IRUSR | stat.S_IWUSR,
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'dir_permissions',
        doc='a number used for permissions for directories in the local '
        'file system',
        default=(stat.S_IRGRP | stat.S_IXGRP | stat.S_IWGRP | stat.S_IRUSR
                 | stat.S_IXUSR | stat.S_IWUSR),
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'json_file_suffix',
        doc='the suffix used to identify a json file',
        default='.json',
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'dump_file_suffix',
        doc='the suffix used to identify a dump file',
        default='.dump',
        reference_value_from='resource.filesystem',
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(FileSystemRawCrashStorage, self).__init__(config)
        self.std_crash_store = JsonDumpStorage(
            root=config.std_fs_root,
            maxDirectoryEntries=config.dump_dir_count,
            jsonSuffix=config.json_file_suffix,
            dumpSuffix=config.dump_file_suffix,
            dumpGID=config.dump_gid,
            dumpPermissions=config.dump_permissions,
            dirPermissions=config.dir_permissions,
            logger=config.logger)
        self.hostname = os.uname()[1]

    #--------------------------------------------------------------------------
    def _load_raw_crash_from_file(self, pathname):
        with open(pathname) as json_file:
            raw_crash = json.load(json_file, object_hook=DotDict)
        return raw_crash

    #--------------------------------------------------------------------------
    def _do_save_raw(self, json_storage_system, raw_crash, dumps, crash_id):
        json_storage_system.new_entry(crash_id, raw_crash, dumps,
                                      self.hostname)

    #--------------------------------------------------------------------------
    def save_raw_crash(self, raw_crash, dumps, crash_id):
        """forward the raw_crash and the dump to the underlying file system"""
        self._do_save_raw(self.std_crash_store, raw_crash, dumps, crash_id)

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """ bug 866973 - do not try to save dumps=None into the Filesystem
            We are doing this in lieu of a queuing solution that could allow
            us to operate an independent crashmover. When the queuing system
            is implemented, we could remove this, and have the raw crash
            saved by a crashmover that's consuming crash_ids the same way
            that the processor consumes them.

            Even though it is ok to resave the raw_crash in this case to the
            filesystem, the fs does not know what to do with a dumps=None
            when passed to save_raw, so we are going to avoid that.
        """
        self.save_processed(processed_crash)

    #--------------------------------------------------------------------------
    def get_raw_crash(self, crash_id):
        """fetch the raw crash from the underlying file system"""
        try:
            pathname = self.std_crash_store.getJson(crash_id)
            return self._load_raw_crash_from_file(pathname)
        except OSError:
            raise CrashIDNotFound(crash_id)
        except ValueError:  # empty json file?
            return DotDict()

    #--------------------------------------------------------------------------
    def get_raw_dump(self, crash_id, dump_name=None):
        """read the binary crash dump from the underlying file system by
        getting the pathname and then opening and reading the file."""
        try:
            job_pathname = self.std_crash_store.getDump(crash_id, dump_name)
            with open(job_pathname) as dump_file:
                binary = dump_file.read()
            return binary
        except OSError:
            raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def _do_get_raw_dumps(self, crash_id, crash_store):
        try:
            dumpname_paths_map = crash_store.get_dumps(crash_id)
            dumpname_dump_map = {}
            for dump_name, dump_pathname in dumpname_paths_map.iteritems():
                with open(dump_pathname, 'rb') as f:
                    dumpname_dump_map[dump_name] = f.read()
            return dumpname_dump_map
        except OSError:
            raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def get_raw_dumps(self, crash_id):
        """read the all the binary crash dumps from the underlying file system
        by getting the pathnames and then opening and reading the files.
        returns a dict of dump names to binary dumps"""
        return self._do_get_raw_dumps(crash_id, self.std_crash_store)

    #--------------------------------------------------------------------------
    def get_raw_dumps_as_files(self, crash_id):
        """read the all the binary crash dumps from the underlying file system
        by getting the pathnames and then opening and reading the files.
        returns a dict of dump names to binary dumps"""
        return self.std_crash_store.get_dumps(crash_id)

    #--------------------------------------------------------------------------
    def new_crashes(self):
        """return an iterator that yields a list of crash_ids of raw crashes
        that were added to the file system since the last time this iterator
        was requested."""
        # why is this called 'destructiveDateWalk'?  The underlying code
        # that manages the filesystem uses a tree of radix date directories
        # and symbolic links to track "new" raw crashes.  As the the crash_ids
        # are fetched from the file system, the symbolic links are removed and
        # directories are deleted.  Essentially, the state of what is
        # considered to be new is saved within the file system by those links.
        return self.std_crash_store.destructiveDateWalk()

    #--------------------------------------------------------------------------
    def remove(self, crash_id):
        """delegate removal of a raw crash to the underlying filesystem"""
        try:
            self.std_crash_store.quickDelete(crash_id)
        except NoSuchUuidFound:
            raise CrashIDNotFound(crash_id)
Beispiel #44
0
class SubmitterApp(FetchTransformSaveWithSeparateNewCrashSourceApp):
    app_name = 'submitter_app'
    app_version = '3.1'
    app_description = __doc__

    required_config = Namespace()
    required_config.namespace('submitter')
    required_config.submitter.add_option(
        'delay',
        doc="pause between submission queuing in milliseconds",
        default='0',
        from_string_converter=lambda x: float(x) / 1000.0
    )
    required_config.submitter.add_option(
        'dry_run',
        doc="don't actually submit, just print product/version from raw crash",
        short_form='D',
        default=False
    )

    #--------------------------------------------------------------------------
    @staticmethod
    def get_application_defaults():
        return {
            "source.crashstorage_class": SubmitterFileSystemWalkerSource,
            "destination.crashstorage_class":
                'socorro.collector.breakpad_submitter_utilities'
                '.BreakpadPOSTDestination',
            "number_of_submissions": "all",
        }

    #--------------------------------------------------------------------------
    def _action_between_each_iteration(self):
        if self.config.submitter.delay:
            time.sleep(self.config.submitter.delay)

    #--------------------------------------------------------------------------
    def _action_after_iteration_completes(self):
        self.config.logger.info(
            'the queuing iterator is exhausted - waiting to quit'
        )
        self.task_manager.wait_for_empty_queue(
            5,
            "waiting for the queue to drain before quitting"
        )
        time.sleep(self.config.producer_consumer.number_of_threads * 2)

    #--------------------------------------------------------------------------
    def _filter_disallowed_values(self, current_value):
        """in this base class there are no disallowed values coming from the
        iterators.  Other users of these iterator may have some standards and
        can detect and reject them here"""
        return current_value is None

    #--------------------------------------------------------------------------
    def _transform(self, crash_id):
        """this transform function only transfers raw data from the
        source to the destination without changing the data."""
        if self.config.submitter.dry_run:
            print crash_id
        else:
            raw_crash = self.source.get_raw_crash(crash_id)
            dumps = self.source.get_raw_dumps_as_files(crash_id)
            self.destination.save_raw_crash_with_file_dumps(
                raw_crash,
                dumps,
                crash_id
            )
Beispiel #45
0
class PostgreSQLCrashStorage(CrashStorageBase):
    """this implementation of crashstorage saves processed crashes to
    an instance of Postgresql.  It only saves certain key values to the
    partitioned reports table, therefore it is not a source for fetching
    complete processed reports and doesn't not implement any of the 'get'
    methods."""

    required_config = Namespace()

    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithInfiniteBackoff",
        doc='a class that will manage transactions',
        from_string_converter=class_converter,
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        'database_class',
        default=ConnectionContext,
        doc='the class responsible for connecting to Postgres',
        reference_value_from='resource.postgresql',
    )

    _reports_table_mappings = (
        # processed name, reports table name
        ("addons_checked", "addons_checked"),
        ("address", "address"),
        ("app_notes", "app_notes"),
        ("build", "build"),
        ("client_crash_date", "client_crash_date"),
        ("completeddatetime", "completed_datetime"),
        ("cpu_info", "cpu_info"),
        ("cpu_name", "cpu_name"),
        ("date_processed", "date_processed"),
        ("distributor", "distributor"),
        ("distributor_version", "distributor_version"),
        ("email", "email"),
        ("exploitability", "exploitability"),
        # ("flash_process_dump", "flash_process_dump"),  # future
        ("flash_version", "flash_version"),
        ("hangid", "hangid"),
        ("install_age", "install_age"),
        ("last_crash", "last_crash"),
        ("os_name", "os_name"),
        ("os_version", "os_version"),
        ("processor_notes", "processor_notes"),
        ("process_type", "process_type"),
        ("product", "product"),
        ("productid", "productid"),
        ("reason", "reason"),
        ("release_channel", "release_channel"),
        ("signature", "signature"),
        ("startedDateTime", "started_datetime"),
        ("success", "success"),
        ("topmost_filenames", "topmost_filenames"),
        ("truncated", "truncated"),
        ("uptime", "uptime"),
        ("user_comments", "user_comments"),
        ("user_id", "user_id"),
        ("url", "url"),
        ("uuid", "uuid"),
        ("version", "version"),
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(PostgreSQLCrashStorage,
              self).__init__(config, quit_check_callback=quit_check_callback)
        self.database = config.database_class(config)
        self.transaction = config.transaction_executor_class(
            config, self.database, quit_check_callback=quit_check_callback)

    #--------------------------------------------------------------------------
    def save_raw_crash(self, raw_crash, dumps, crash_id):
        """nota bene: this function does not save the dumps in PG, only
        the raw crash json is saved."""
        self.transaction(self._save_raw_crash_transaction, raw_crash, crash_id)

    #-------------------------------------------------------------------------
    def _save_raw_crash_transaction(self, connection, raw_crash, crash_id):
        raw_crash_table_name = ('raw_crashes_%s' %
                                self._table_suffix_for_crash_id(crash_id))

        upsert_sql = """
        WITH
        update_raw_crash AS (
            UPDATE %(table)s SET
                raw_crash = %%(raw_crash)s,
                date_processed = %%(date_processed)s
            WHERE uuid = %%(crash_id)s
            RETURNING 1
        ),
        insert_raw_crash AS (
            INSERT into %(table)s (uuid, raw_crash, date_processed)
            ( SELECT
                %%(crash_id)s as uuid,
                %%(raw_crash)s as raw_crash,
                %%(date_processed)s as date_processed
                WHERE NOT EXISTS (
                    SELECT uuid from %(table)s
                    WHERE
                        uuid = %%(crash_id)s
                    LIMIT 1
                )
            )
            RETURNING 2
        )
        SELECT * from update_raw_crash
        UNION ALL
        SELECT * from insert_raw_crash
        """ % {
            'table': raw_crash_table_name
        }

        values = {
            'crash_id': crash_id,
            'raw_crash': json.dumps(raw_crash),
            'date_processed': raw_crash["submitted_timestamp"]
        }
        execute_no_results(connection, upsert_sql, values)

    #--------------------------------------------------------------------------
    def get_raw_crash(self, crash_id):
        """the default implementation of fetching a raw_crash

        parameters:
           crash_id - the id of a raw crash to fetch"""
        return self.transaction(self._get_raw_crash_transaction, crash_id)

    #--------------------------------------------------------------------------
    def _get_raw_crash_transaction(self, connection, crash_id):
        raw_crash_table_name = ('raw_crash_%s' %
                                self._table_suffix_for_crash_id(crash_id))
        fetch_sql = 'select raw_crash from %s where uuid = %%s' % \
                    raw_crash_table_name
        try:
            return single_value_sql(connection, fetch_sql, (crash_id, ))
        except SQLDidNotReturnSingleValue:
            raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def save_processed(self, processed_crash):
        self.transaction(self._save_processed_transaction, processed_crash)

    #--------------------------------------------------------------------------
    def _save_processed_transaction(self, connection, processed_crash):
        report_id = self._save_processed_report(connection, processed_crash)
        self._save_plugins(connection, processed_crash, report_id)
        self._save_extensions(connection, processed_crash, report_id)
        self._save_processed_crash(connection, processed_crash)

    def _save_processed_crash(self, connection, processed_crash):
        crash_id = processed_crash['uuid']
        processed_crashes_table_name = (
            'processed_crashes_%s' % self._table_suffix_for_crash_id(crash_id))
        upsert_sql = """
        WITH
        update_processed_crash AS (
            UPDATE %(table)s SET
                processed_crash = %%(processed_json)s,
                date_processed = %%(date_processed)s
            WHERE uuid = %%(uuid)s
            RETURNING 1
        ),
        insert_processed_crash AS (
            INSERT INTO %(table)s (uuid, processed_crash, date_processed)
            ( SELECT
                %%(uuid)s as uuid,
                %%(processed_json)s as processed_crash,
                %%(date_processed)s as date_processed
                WHERE NOT EXISTS (
                    SELECT uuid from %(table)s
                    WHERE
                        uuid = %%(uuid)s
                    LIMIT 1
                )
            )
            RETURNING 2
        )
        SELECT * from update_processed_crash
        UNION ALL
        SELECT * from insert_processed_crash
        """ % {
            'table': processed_crashes_table_name,
            'uuid': crash_id
        }

        values = {
            'processed_json': json.dumps(processed_crash, cls=JsonDTEncoder),
            'date_processed': processed_crash["date_processed"],
            'uuid': crash_id
        }
        execute_no_results(connection, upsert_sql, values)

    #--------------------------------------------------------------------------
    def _save_processed_report(self, connection, processed_crash):
        """ Here we INSERT or UPDATE a row in the reports table.
        This is the first stop before imported data gets into our normalized
        batch reporting (next table: reports_clean).

        At some point in the future, we will switch to using the raw_crash
        table and JSON transforms instead. This work will require an overhaul
        and optimization of the update_reports_clean() and
        update_reports_duplicates() stored procedures.

        We perform an UPSERT using a PostgreSQL CTE (aka WITH clause) that
        first tests whether a row exists and performs an UPDATE if it can, or
        it performs an INSERT. Because we're using raw SQL in this function,
        we've got a substantial parameterized query that requires two sets of
        parameters to be passed in via value_list. The value_list ends up
        having an extra crash_id added to the list, and being doubled before
        being passed to single_value_sql().

        The SQL produced isn't beautiful, but a side effect of the CTE style of
        UPSERT-ing. We look forward to SQL UPSERT being adopted as a
        first-class citizen in PostgreSQL.

        Similar code is present for _save_raw_crash() and
        _save_processed_crash(), but is much simpler seeming because there are
        far fewer columns being passed into the parameterized query.
        """
        column_list = []
        placeholder_list = []
        value_list = []
        for pro_crash_name, report_name in self._reports_table_mappings:
            column_list.append(report_name)
            placeholder_list.append('%s')
            value_list.append(processed_crash[pro_crash_name])

        def print_eq(a, b):
            # Helper for UPDATE SQL clause
            return a + ' = ' + b

        def print_as(a, b):
            # Helper for INSERT SQL clause
            return b + ' as ' + a

        crash_id = processed_crash['uuid']
        reports_table_name = ('reports_%s' %
                              self._table_suffix_for_crash_id(crash_id))
        upsert_sql = """
        WITH
        update_report AS (
            UPDATE %(table)s SET
                %(joined_update_clause)s
            WHERE uuid = %%s
            RETURNING id
        ),
        insert_report AS (
            INSERT INTO %(table)s (%(column_list)s)
            ( SELECT
                %(joined_select_clause)s
                WHERE NOT EXISTS (
                    SELECT uuid from %(table)s
                    WHERE
                        uuid = %%s
                    LIMIT 1
                )
            )
            RETURNING id
        )
        SELECT * from update_report
        UNION ALL
        SELECT * from insert_report
        """ % {
            'joined_update_clause':
            ", ".join(map(print_eq, column_list, placeholder_list)),
            'table':
            reports_table_name,
            'column_list':
            ', '.join(column_list),
            'joined_select_clause':
            ", ".join(map(print_as, column_list, placeholder_list)),
        }

        value_list.append(crash_id)
        value_list.extend(value_list)

        report_id = single_value_sql(connection, upsert_sql, value_list)
        return report_id

    #--------------------------------------------------------------------------
    def _save_plugins(self, connection, processed_crash, report_id):
        """ Electrolysis Support - Optional - processed_crash may contain a
        ProcessType of plugin. In the future this value would be default,
        content, maybe even Jetpack... This indicates which process was the
        crashing process.
            plugin - When set to plugin, the jsonDocument MUST calso contain
                     PluginFilename, PluginName, and PluginVersion
        """
        process_type = processed_crash['process_type']
        if not process_type:
            return

        if process_type == "plugin":

            # Bug#543776 We actually will are relaxing the non-null policy...
            # a null filename, name, and version is OK. We'll use empty strings
            try:
                plugin_filename = processed_crash['PluginFilename']
                plugin_name = processed_crash['PluginName']
                plugin_version = processed_crash['PluginVersion']
            except KeyError, x:
                self.config.logger.error(
                    'the crash is missing a required field: %s', str(x))
                return
            find_plugin_sql = ('select id from plugins '
                               'where filename = %s '
                               'and name = %s')
            try:
                plugin_id = single_value_sql(connection, find_plugin_sql,
                                             (plugin_filename, plugin_name))
            except SQLDidNotReturnSingleValue:
                insert_plugsins_sql = ("insert into plugins (filename, name) "
                                       "values (%s, %s) returning id")
                plugin_id = single_value_sql(connection, insert_plugsins_sql,
                                             (plugin_filename, plugin_name))
            crash_id = processed_crash['uuid']
            table_suffix = self._table_suffix_for_crash_id(crash_id)
            plugin_reports_table_name = 'plugins_reports_%s' % table_suffix
            plugins_reports_insert_sql = (
                'insert into %s '
                '    (report_id, plugin_id, date_processed, version) '
                'values '
                '    (%%s, %%s, %%s, %%s)' % plugin_reports_table_name)
            values_tuple = (report_id, plugin_id,
                            processed_crash['date_processed'], plugin_version)
            execute_no_results(connection, plugins_reports_insert_sql,
                               values_tuple)
class MissingSymbolsRule(Rule):
    required_config = Namespace()
    required_config.add_option(
        'database_class',
        doc="the class of the database",
        default='socorro.external.postgresql.connection_context.'
        'ConnectionContext',
        from_string_converter=str_to_python_object,
        reference_value_from='resource.postgresql',
    )
    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithInfiniteBackoff",
        doc='a class that will manage transactions',
        from_string_converter=str_to_python_object,
        reference_value_from='resource.postgresql',
    )

    def __init__(self, config):
        super(MissingSymbolsRule, self).__init__(config)
        self.database = self.config.database_class(config)
        self.transaction = self.config.transaction_executor_class(
            config,
            self.database,
        )
        self.sql = (
            "INSERT INTO missing_symbols_%s"
            " (date_processed, debug_file, debug_id, code_file, code_id)"
            " VALUES (%%s, %%s, %%s, %%s, %%s)")

    def version(self):
        return '1.0'

    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        try:
            date = processed_crash['date_processed']
            # update partition information based on date processed
            sql = self.sql % datestring_to_weekly_partition(date)
            for module in processed_crash['json_dump']['modules']:
                try:
                    # First of all, only bother if there are
                    # missing_symbols in this module.
                    # And because it's not useful if either of debug_file
                    # or debug_id are empty, we filter on that here too.
                    if (module['missing_symbols'] and module['debug_file']
                            and module['debug_id']):
                        self.transaction(
                            execute_no_results,
                            sql,
                            (
                                date,
                                module['debug_file'],
                                module['debug_id'],
                                # These two use .get() because the keys
                                # were added later in history. If it's
                                # non-existent (or existant and None), it
                                # will proceed and insert as a nullable.
                                module.get('filename'),
                                module.get('code_id'),
                            ))
                except self.database.ProgrammingError:
                    processor_meta.processor_notes.append(
                        "WARNING: missing symbols rule failed for"
                        " %s" % raw_crash.uuid)
                except KeyError:
                    pass
        except KeyError:
            return False
        return True
Beispiel #47
0
class OutOfDateClassifier(SupportClassificationRule):
    """To satisfy Bug 956879, this rule will detect classify crashes as out
    of date if the version is less than the threshold
    'firefox_out_of_date_version' found in the processor configuration"""

    required_config = Namespace()
    required_config.add_option(
        'firefox_out_of_date_version',
        doc='the version of Firefox that is considered to be old enough '
        'to warrant a warning to the user',
        default='17',
    )

    def version(self):
        return '1.0'

    def _predicate(self, raw_crash, raw_dumps, processed_crash, processor):
        try:
            return (raw_crash.ProductName == 'Firefox' and
                    normalize(raw_crash.Version) < self.out_of_date_threshold)
        except AttributeError:
            try:
                self.out_of_date_threshold = normalize(
                    self.config.firefox_out_of_date_version)
            except (AttributeError, KeyError):
                self.out_of_date_threshold = normalize(
                    processor.config.firefox_out_of_date_version)
            return self._predicate(raw_crash, raw_dumps, processed_crash,
                                   processor)

    @staticmethod
    def _normalize_windows_version(version_str):
        ver_list = version_str.split('.')[:2]

        def as_int(x):
            try:
                return int(x)
            except ValueError:
                return maxint

        # get the first integer out of the last last token
        ver_list[-1] = ver_list[-1].split(' ')[0]
        ver_list_normalized = [as_int(x) for x in ver_list]
        if "Service" in version_str:
            try:
                # assume last space delimited field is service pack number
                ver_list_normalized.append(int(version_str.split(' ')[-1]))
            except ValueError:  # appears to have been a bad assumption
                ver_list_normalized.append(0)
        return tuple(ver_list_normalized)

    def _windows_action(self, raw_crash, raw_dumps, processed_crash,
                        processor):
        win_version_normalized = self._normalize_windows_version(
            processed_crash["json_dump"]["system_info"]["os_ver"])
        if win_version_normalized[:2] == (5, 0):  # Win2K
            return self._add_classification(
                processed_crash, 'firefox-no-longer-works-windows-2000', None,
                processor.config.logger)
        elif win_version_normalized < (5, 1, 3):  # WinXP SP2
            return self._add_classification(
                processed_crash,
                'firefox-no-longer-works-some-versions-windows-xp', None,
                processor.config.logger)
        return self._add_classification(processed_crash,
                                        'update-firefox-latest-version', None,
                                        processor.config.logger)

    @staticmethod
    def _normalize_osx_version(version_str):
        ver_list = version_str.split('.')[:2]

        def as_int(x):
            try:
                return int(x)
            except ValueError:
                return maxint

        return tuple(as_int(x) for x in ver_list)

    def _osx_action(self, raw_crash, raw_dumps, processed_crash, processor):
        osx_version_normalized = self._normalize_osx_version(
            processed_crash["json_dump"]["system_info"]["os_ver"])
        if (osx_version_normalized <= (10, 4)
                or processed_crash["json_dump"]["system_info"]["cpu_arch"]
                == 'ppc'):
            return self._add_classification(
                processed_crash,
                'firefox-no-longer-works-mac-os-10-4-or-powerpc', None,
                processor.config.logger)
        elif osx_version_normalized == (10, 5):
            return self._add_classification(
                processed_crash, 'firefox-no-longer-works-mac-os-x-10-5', None,
                processor.config.logger)
        return self._add_classification(processed_crash,
                                        'update-firefox-latest-version', None,
                                        processor.config.logger)

    def _action(self, raw_crash, raw_dumps, processed_crash, processor):
        crashed_version = normalize(raw_crash.Version)
        if "Win" in processed_crash["json_dump"]["system_info"]['os']:
            return self._windows_action(raw_crash, raw_dumps, processed_crash,
                                        processor)
        elif processed_crash["json_dump"]["system_info"]['os'] == "Mac OS X":
            return self._osx_action(raw_crash, raw_dumps, processed_crash,
                                    processor)
        else:
            return self._add_classification(processed_crash,
                                            'update-firefox-latest-version',
                                            None, processor.config.logger)
class FlashVersionRule(Rule):
    required_config = Namespace()
    required_config.add_option(
        'known_flash_identifiers',
        doc='A subset of the known "debug identifiers" for flash versions, '
        'associated to the version',
        default={
            '7224164B5918E29AF52365AF3EAF7A500': '10.1.51.66',
            'C6CDEFCDB58EFE5C6ECEF0C463C979F80': '10.1.51.66',
            '4EDBBD7016E8871A461CCABB7F1B16120': '10.1',
            'D1AAAB5D417861E6A5B835B01D3039550': '10.0.45.2',
            'EBD27FDBA9D9B3880550B2446902EC4A0': '10.0.45.2',
            '266780DB53C4AAC830AFF69306C5C0300': '10.0.42.34',
            'C4D637F2C8494896FBD4B3EF0319EBAC0': '10.0.42.34',
            'B19EE2363941C9582E040B99BB5E237A0': '10.0.32.18',
            '025105C956638D665850591768FB743D0': '10.0.32.18',
            '986682965B43DFA62E0A0DFFD7B7417F0': '10.0.23',
            '937DDCC422411E58EF6AD13710B0EF190': '10.0.23',
            '860692A215F054B7B9474B410ABEB5300': '10.0.22.87',
            '77CB5AC61C456B965D0B41361B3F6CEA0': '10.0.22.87',
            '38AEB67F6A0B43C6A341D7936603E84A0': '10.0.12.36',
            '776944FD51654CA2B59AB26A33D8F9B30': '10.0.12.36',
            '974873A0A6AD482F8F17A7C55F0A33390': '9.0.262.0',
            'B482D3DFD57C23B5754966F42D4CBCB60': '9.0.262.0',
            '0B03252A5C303973E320CAA6127441F80': '9.0.260.0',
            'AE71D92D2812430FA05238C52F7E20310': '9.0.246.0',
            '6761F4FA49B5F55833D66CAC0BBF8CB80': '9.0.246.0',
            '27CC04C9588E482A948FB5A87E22687B0': '9.0.159.0',
            '1C8715E734B31A2EACE3B0CFC1CF21EB0': '9.0.159.0',
            'F43004FFC4944F26AF228334F2CDA80B0': '9.0.151.0',
            '890664D4EF567481ACFD2A21E9D2A2420': '9.0.151.0',
            '8355DCF076564B6784C517FD0ECCB2F20': '9.0.124.0',
            '51C00B72112812428EFA8F4A37F683A80': '9.0.124.0',
            '9FA57B6DC7FF4CFE9A518442325E91CB0': '9.0.115.0',
            '03D99C42D7475B46D77E64D4D5386D6D0': '9.0.115.0',
            '0CFAF1611A3C4AA382D26424D609F00B0': '9.0.47.0',
            '0F3262B5501A34B963E5DF3F0386C9910': '9.0.47.0',
            'C5B5651B46B7612E118339D19A6E66360': '9.0.45.0',
            'BF6B3B51ACB255B38FCD8AA5AEB9F1030': '9.0.28.0',
            '83CF4DC03621B778E931FC713889E8F10': '9.0.16.0',
        },
        from_string_converter=ujson.loads)
    required_config.add_option(
        'flash_re',
        doc='a regular expression to match Flash file names',
        default=(r'NPSWF32_?(.*)\.dll|'
                 'FlashPlayerPlugin_?(.*)\.exe|'
                 'libflashplayer(.*)\.(.*)|'
                 'Flash ?Player-?(.*)'),
        from_string_converter=re.compile)

    def version(self):
        return '1.0'

    def _get_flash_version(self, **kwargs):
        """If (we recognize this module as Flash and figure out a version):
        Returns version; else (None or '')"""
        filename = kwargs.get('filename', None)
        version = kwargs.get('version', None)
        debug_id = kwargs.get('debug_id', None)
        m = self.config.flash_re.match(filename)
        if m:
            if version:
                return version
            # we didn't get a version passed into us
            # try do deduce it
            groups = m.groups()
            if groups[0]:
                return groups[0].replace('_', '.')
            if groups[1]:
                return groups[1].replace('_', '.')
            if groups[2]:
                return groups[2]
            if groups[4]:
                return groups[4]
            return self.config.known_flash_identifiers.get(debug_id, None)
        return None

    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        processed_crash.flash_version = ''
        flash_version = None

        modules = processed_crash.get('json_dump', {}).get('modules', [])
        if isinstance(modules, (tuple, list)):
            for index, a_module in enumerate(modules):
                flash_version = self._get_flash_version(**a_module)
                if flash_version:
                    break

        if flash_version:
            processed_crash.flash_version = flash_version
        else:
            processed_crash.flash_version = '[blank]'
        return True
class FetchADIFromHiveCronApp(BaseCronApp):
    """ This cron is our daily blocklist ping web logs query
        that rolls up all the browser checkins and let's us know
        how many browsers we think were active on the internet
        for a particular day """
    app_name = 'fetch-adi-from-hive'
    app_description = 'Fetch ADI From Hive App'
    app_version = '0.1'

    required_config = Namespace()
    required_config.add_option('query',
                               default=_QUERY,
                               doc='Hive query for fetching ADI data')

    required_config.add_option('hive_host',
                               default='localhost',
                               doc='Hostname to run Hive query on')

    required_config.add_option('hive_port',
                               default=10000,
                               doc='Port to run Hive query on')

    required_config.add_option('hive_user',
                               default='socorro',
                               doc='User to connect to Hive with')

    required_config.add_option('hive_password',
                               default='ignored',
                               doc='Password to connect to Hive with')

    required_config.add_option('hive_database',
                               default='default',
                               doc='Database name to connect to Hive with')

    required_config.add_option('hive_auth_mechanism',
                               default='PLAIN',
                               doc='Auth mechanism for Hive')

    def run(self, connection, date):
        target_date = (date - datetime.timedelta(days=1)).strftime('%Y-%m-%d')

        raw_adi_logs_pathname = os.path.join(
            tempfile.gettempdir(),
            "%s.raw_adi_logs.TEMPORARY%s" % (target_date, '.txt'))
        try:
            with open(raw_adi_logs_pathname, 'w') as f:
                hive = pyhs2.connect(
                    host=self.config.hive_host,
                    port=self.config.hive_port,
                    authMechanism=self.config.hive_auth_mechanism,
                    user=self.config.hive_user,
                    password=self.config.hive_password,
                    database=self.config.hive_database)

                cur = hive.cursor()
                query = self.config.query % target_date
                cur.execute(query)
                for row in cur:
                    f.write("\t".join(str(v) for v in row))
                    f.write("\n")

            with open(raw_adi_logs_pathname, 'r') as f:
                pgcursor = connection.cursor()
                pgcursor.copy_from(f,
                                   'raw_adi_logs',
                                   null='None',
                                   columns=[
                                       'report_date', 'product_name',
                                       'product_os_platform',
                                       'product_os_version', 'product_version',
                                       'build', 'build_channel',
                                       'product_guid', 'count'
                                   ])
        finally:
            if os.path.isfile(raw_adi_logs_pathname):
                os.remove(raw_adi_logs_pathname)
class OutOfMemoryBinaryRule(Rule):

    required_config = Namespace()
    required_config.add_option(
        'max_size_uncompressed',
        default=20 * 1024 * 1024,  # ~20 Mb
        doc=("Number of bytes, max, that we accept memory info payloads "
             "as JSON."))

    def version(self):
        return '1.0'

    def _predicate(self, raw_crash, raw_dumps, processed_crash, proc_meta):
        return 'memory_report' in raw_dumps

    def _extract_memory_info(self, dump_pathname, processor_notes):
        """Extract and return the JSON data from the .json.gz memory report.
        file"""
        def error_out(error_message):
            processor_notes.append(error_message)
            return {"ERROR": error_message}

        try:
            fd = gzip.open(dump_pathname, "rb")
        except IOError as x:
            error_message = "error in gzip for %s: %r" % (dump_pathname, x)
            return error_out(error_message)

        try:
            memory_info_as_string = fd.read()
            if len(memory_info_as_string) > self.config.max_size_uncompressed:
                error_message = (
                    "Uncompressed memory info too large %d (max: %d)" % (
                        len(memory_info_as_string),
                        self.config.max_size_uncompressed,
                    ))
                return error_out(error_message)

            memory_info = ujson.loads(memory_info_as_string)
        except IOError as x:
            error_message = "error in gzip for %s: %r" % (dump_pathname, x)
            return error_out(error_message)
        except ValueError as x:
            error_message = "error in json for %s: %r" % (dump_pathname, x)
            return error_out(error_message)
        finally:
            fd.close()

        return memory_info

    def _action(self, raw_crash, raw_dumps, processed_crash, processor_meta):
        pathname = raw_dumps['memory_report']
        with temp_file_context(pathname):
            memory_report = self._extract_memory_info(
                dump_pathname=pathname,
                processor_notes=processor_meta.processor_notes)

            if isinstance(memory_report, dict) and memory_report.get('ERROR'):
                processed_crash.memory_report_error = memory_report['ERROR']
            else:
                processed_crash.memory_report = memory_report

        return True
Beispiel #51
0
class ESCrashStorage(CrashStorageBase):
    """This sends processed crash reports to Elasticsearch."""

    required_config = Namespace()
    required_config.add_option(
        'transaction_executor_class',
        default="socorro.database.transaction_executor."
        "TransactionExecutorWithLimitedBackoff",
        doc='a class that will manage transactions',
        from_string_converter=class_converter,
    )
    required_config.add_option(
        'index_creator_class',
        doc='a class that can create Elasticsearch indices',
        default='socorro.external.es.index_creator.IndexCreator',
        from_string_converter=class_converter
    )

    required_config.elasticsearch = Namespace()
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )

    # This cache reduces attempts to create indices, thus lowering overhead
    # each time a document is indexed.
    indices_cache = set()

    # These regex will catch field names from Elasticsearch exceptions. They
    # have been tested with Elasticsearch 1.4.
    field_name_string_error_re = re.compile(r'field=\"([\w\-.]+)\"')
    field_name_number_error_re = re.compile(
        r'\[failed to parse \[([\w\-.]+)]]'
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(ESCrashStorage, self).__init__(
            config,
            quit_check_callback
        )

        # Ok, it's sane, so let's continue.
        self.es_context = self.config.elasticsearch.elasticsearch_class(
            config=self.config.elasticsearch
        )

        self.transaction = config.transaction_executor_class(
            config,
            self.es_context,
            quit_check_callback
        )

    #--------------------------------------------------------------------------
    def get_index_for_crash(self, crash_date):
        """Return the submission URL for a crash; based on the submission URL
        from config and the date of the crash.
        If the index name contains a datetime pattern (ex. %Y%m%d) then the
        crash_date will be parsed and appended to the index name.
        """

        index = self.config.elasticsearch.elasticsearch_index

        if not index:
            return None
        elif '%' in index:
            # Note that crash_date must be a datetime object!
            index = crash_date.strftime(index)

        return index

    #--------------------------------------------------------------------------
    def save_raw_and_processed(self, raw_crash, dumps, processed_crash,
                               crash_id):
        """This is the only write mechanism that is actually employed in normal
        usage.
        """

        crash_document = {
            'crash_id': crash_id,
            'processed_crash': processed_crash,
            'raw_crash': raw_crash
        }

        self.transaction(
            self._submit_crash_to_elasticsearch,
            crash_document=crash_document
        )

    #--------------------------------------------------------------------------
    @staticmethod
    def reconstitute_datetimes(processed_crash):
        datetime_fields = [
            'submitted_timestamp',
            'date_processed',
            'client_crash_date',
            'started_datetime',
            'startedDateTime',
            'completed_datetime',
            'completeddatetime',
        ]
        for a_key in datetime_fields:
            try:
                processed_crash[a_key] = string_to_datetime(
                    processed_crash[a_key]
                )
            except KeyError:
                # not there? we don't care
                pass

    #--------------------------------------------------------------------------
    def _submit_crash_to_elasticsearch(self, connection, crash_document):
        """Submit a crash report to elasticsearch.
        """
        # Massage the crash such that the date_processed field is formatted
        # in the fashion of our established mapping.
        self.reconstitute_datetimes(crash_document['processed_crash'])

        # Obtain the index name.
        es_index = self.get_index_for_crash(
            crash_document['processed_crash']['date_processed']
        )
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = crash_document['crash_id']

        # Attempt to create the index; it's OK if it already exists.
        if es_index not in self.indices_cache:
            index_creator = self.config.index_creator_class(config=self.config)
            index_creator.create_socorro_index(es_index)

        # Submit the crash for indexing.
        # Don't retry more than 5 times. That is to avoid infinite loops in
        # case of an unhandled exception.
        times = range(5)
        while times.pop(-1):
            try:
                connection.index(
                    index=es_index,
                    doc_type=es_doctype,
                    body=crash_document,
                    id=crash_id
                )
                break
            except elasticsearch.exceptions.TransportError as e:
                field_name = None

                if 'MaxBytesLengthExceededException' in e.error:
                    # This is caused by a string that is way too long for
                    # Elasticsearch.
                    matches = self.field_name_string_error_re.findall(e.error)
                    if matches:
                        field_name = matches[0]
                elif 'NumberFormatException' in e.error:
                    # This is caused by a number that is either too big for
                    # Elasticsearch or just not a number.
                    matches = self.field_name_number_error_re.findall(e.error)
                    if matches:
                        field_name = matches[0]

                if not field_name:
                    # We are unable to parse which field to remove, we cannot
                    # try to fix the document. Let it raise.
                    self.config.logger.critical(
                        'Submission to Elasticsearch failed for %s (%s)',
                        crash_id,
                        e,
                        exc_info=True
                    )
                    raise

                if field_name.endswith('.full'):
                    # Remove the `.full` at the end, that is a special mapping
                    # construct that is not part of the real field name.
                    field_name = field_name.rstrip('.full')

                # Now remove that field from the document before trying again.
                field_path = field_name.split('.')
                parent = crash_document
                for i, field in enumerate(field_path):
                    if i == len(field_path) - 1:
                        # This is the last level, so `field` contains the name
                        # of the field that we want to remove from `parent`.
                        del parent[field]
                    else:
                        parent = parent[field]

                # Add a note in the document that a field has been removed.
                if crash_document.get('removed_fields'):
                    crash_document['removed_fields'] = '{} {}'.format(
                        crash_document['removed_fields'],
                        field_name
                    )
                else:
                    crash_document['removed_fields'] = field_name
            except elasticsearch.exceptions.ElasticsearchException as e:
                self.config.logger.critical(
                    'Submission to Elasticsearch failed for %s (%s)',
                    crash_id,
                    e,
                    exc_info=True
                )
                raise
class HBaseSingleConnectionContext(RequiredConfig):
    """a configman compliant class for setup of HBase connections
    DO NOT SHARE HBASE CONNECTIONS BETWEEN THREADS
    """
    #--------------------------------------------------------------------------
    # configman parameter definition section
    # here we're setting up the minimal parameters required for connecting
    required_config = Namespace()
    required_config.add_option(
        'number_of_retries',
        doc='Max. number of retries when fetching from hbaseClient',
        default=0,
        reference_value_from='resource.hbase')
    required_config.add_option(
        'hbase_host',
        doc='Host to HBase server',
        default='localhost',
        reference_value_from='resource.hbase',
    )
    required_config.add_option(
        'hbase_port',
        doc='Port to HBase server',
        default=9090,
        reference_value_from='resource.hbase',
    )
    required_config.add_option(
        'hbase_timeout',
        doc='timeout in milliseconds for an HBase connection',
        default=5000,
        reference_value_from='resource.hbase',
    )
    required_config.add_option(
        'temporary_file_system_storage_path',
        doc='a local filesystem path where dumps temporarily '
        'during processing',
        default='/home/socorro/temp',
        reference_value_from='resource.hbase',
    )
    required_config.add_option(
        'dump_file_suffix',
        doc='the suffix used to identify a dump file (for use in temp files)',
        default='.dump',
        reference_value_from='resource.hbase',
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, local_config=None):
        """Initialize the parts needed to start making database connections

        parameters:
            config - the complete config for the app.  If a real app, this
                     would be where a logger or other resources could be
                     found.
            local_config - this is the namespace within the complete config
                           where the actual database parameters are found"""
        super(HBaseSingleConnectionContext, self).__init__()
        self.config = config
        if local_config is None:
            local_config = config

        dummy_connection = hbase_client.HBaseConnectionForCrashReports(
            local_config.hbase_host,
            local_config.hbase_port,
            local_config.hbase_timeout,
            logger=self.config.logger)
        dummy_connection.close()
        self.operational_exceptions = \
            dummy_connection.hbaseThriftExceptions
        self.operational_exceptions += \
            (hbase_client.NoConnectionException,)
        self.conditional_exceptions = ()

    #--------------------------------------------------------------------------
    def connection(self, name_unused=None):
        """return a new database connection

        parameters:
            name_unused - optional named connections.  Used by the
                          derived class
        """
        #self.config.logger.debug('creating new HBase connection')
        return hbase_client.HBaseConnectionForCrashReports(
            self.config.hbase_host,
            self.config.hbase_port,
            self.config.hbase_timeout,
            logger=self.config.logger)

    #--------------------------------------------------------------------------
    @contextlib.contextmanager
    def __call__(self, name=None):
        """returns a database connection wrapped in a contextmanager.

        The context manager will assure that the connection is closed but will
        not try to commit or rollback lingering transactions.

        parameters:
            name - an optional name for the database connection"""
        conn = self.connection(name)
        try:
            #self.config.logger.debug('connection HBase acquired')
            yield conn
        finally:
            self.close_connection(conn)

    #--------------------------------------------------------------------------
    def close_connection(self, connection, force=False):
        """close the connection passed in.

        This function exists to allow derived classes to override the closing
        behavior.

        parameters:
            connection - the database connection object
            force - unused boolean to force closure; used in derived classes
        """
        #self.config.logger.debug('connection HBase closed')
        connection.close()

    #--------------------------------------------------------------------------
    def close(self):
        """close any pooled or cached connections.  Since this base class
        object does no caching, there is no implementation required.  Derived
        classes may implement it."""
        pass

    #--------------------------------------------------------------------------
    def is_operational_exception(self, msg):
        """return True if a conditional exception is actually an operational
        error. Return False if it's a genuine error that should probably be
        raised and propagate up.

        Some conditional exceptions might be actually be some form of
        operational exception "labelled" wrong by the psycopg2 code error
        handler.
        """

        return False

    #--------------------------------------------------------------------------
    def force_reconnect(self):
        pass
Beispiel #53
0
class DependencySecurityCheckCronApp(BaseCronApp):
    """Configuration values used by this app:

    crontabber.class-DependencySecurityCheckCronApp.nsp_path
        Path to the nsp binary for checking Node dependencies.
    crontabber.class-DependencySecurityCheckCronApp.safety_path
        Path to the PyUp Safety binary for checking Python dependencies.
    crontabber.class-DependencySecurityCheckCronApp.safety_api_key
        Optional API key to pass to Safety.
    crontabber.class-DependencySecurityCheckCronApp.package_json_path
        Path to the package.json file to run nsp against.
    secrets.sentry.dsn
        If specified, vulnerabilities will be reported to Sentry instead
        of logged to the console.

    """
    app_name = 'dependency-security-check'
    app_description = (
        'Runs third-party tools that check for known security vulnerabilites in Socorro\'s '
        'dependencies.')
    app_version = '0.1'

    required_config = Namespace()
    required_config.add_option(
        'nsp_path',
        doc='Path to the nsp binary',
    )
    required_config.add_option(
        'safety_path',
        doc='Path to the PyUp safety binary',
    )
    required_config.add_option(
        'safety_api_key',
        doc='API key for Safety to use latest Pyup vulnerability database',
    )
    required_config.add_option(
        'package_json_path',
        doc='Path to the package.json file to run nsp against',
    )

    def run(self):
        self.validate_options()

        vulnerabilities = self.get_python_vulnerabilities(
        ) + self.get_javascript_vulnerabilities()
        if vulnerabilities:
            try:
                dsn = self.config.sentry.dsn
            except KeyError:
                dsn = None

            if dsn:
                self.alert_sentry(dsn, vulnerabilities)
            else:
                self.alert_log(vulnerabilities)

    def validate_options(self):
        # Validate file path options
        for option in ('nsp_path', 'safety_path', 'package_json_path'):
            value = self.config.get(option)
            if not value:
                raise OptionError('Required option "%s" is empty' % option)
            elif not os.path.exists(value):
                raise OptionError(
                    'Option "%s" points to a nonexistant file (%s)' %
                    (option, value))
            elif not os.path.isfile(value):
                raise OptionError('Option "%s" does not point to a file (%s)' %
                                  (option, value))

    def alert_sentry(self, dsn, vulnerabilities):
        client = raven_client.get_client(dsn)
        client.context.activate()
        client.context.merge({
            'extra': {
                'data': {vuln.key: vuln.summary
                         for vuln in vulnerabilities},
            },
        })
        client.captureMessage('Dependency security check failed')

    def alert_log(self, vulnerabilities):
        self.config.logger.error('Vulnerabilities found in dependencies!')
        for vuln in vulnerabilities:
            self.config.logger.error('%s: %s' % (vuln.key, vuln.summary))

    def get_python_vulnerabilities(self):
        """Check Python dependencies via Pyup's safety command.

        :returns list(Vulnerability):
        :raises DependencySecurityCheckFailed:
        """
        # Safety checks what's installed in the current virtualenv, so no need
        # for any paths.
        cmd = [self.config.safety_path, 'check', '--json']
        if self.config.get('safety_api_key'):
            cmd += ['--key', self.config.safety_api_key]

        process = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
        output, error_output = process.communicate()

        if process.returncode == 0:
            return []
        elif process.returncode == 255:
            try:
                results = json.loads(output)
                return [
                    Vulnerability(
                        type='python',
                        dependency=result[0],
                        installed_version=result[2],
                        affected_versions=result[1],
                        description=result[3],
                    ) for result in results
                ]
            except (ValueError, IndexError) as err:
                raise DependencySecurityCheckFailed(
                    'Could not parse pyup safety output',
                    err,
                    output,
                )

        raise DependencySecurityCheckFailed(error_output)

    def get_javascript_vulnerabilities(self):
        """Check JavaScript dependencies via the nsp command.

        :returns list(Vulnerability):
        :raises DependencySecurityCheckFailed:
        """
        process = Popen(
            [
                self.config.nsp_path,
                'check',
                '--reporter=json',
            ],
            stdin=PIPE,
            stdout=PIPE,
            stderr=PIPE,
            cwd=dirname(self.config.package_json_path),
        )
        output, error_output = process.communicate()
        if process.returncode == 0:
            return []
        elif process.returncode == 1:
            try:
                results = json.loads(output)
                return [
                    Vulnerability(
                        type='javascript',
                        dependency=result['module'],
                        installed_version=result['version'],
                        affected_versions=result['vulnerable_versions'],
                        description=result['advisory'],
                    ) for result in results
                ]
            except (ValueError, KeyError) as err:
                raise DependencySecurityCheckFailed(
                    'Could not parse nsp output', err, output)

        raise DependencySecurityCheckFailed(error_output)
Beispiel #54
0
class ESCrashStorage(CrashStorageBase):
    """This sends raw and processed crash reports to Elasticsearch."""

    required_config = Namespace()
    required_config.elasticsearch = Namespace()
    required_config.elasticsearch.add_option(
        'elasticsearch_class',
        default='socorro.external.es.connection_context.ConnectionContext',
        from_string_converter=class_converter,
        reference_value_from='resource.elasticsearch',
    )

    # These regex will catch field names from Elasticsearch exceptions. They
    # have been tested with Elasticsearch 1.4.
    field_name_string_error_re = re.compile(r'field=\"([\w\-.]+)\"')
    field_name_number_error_re = re.compile(r'\[failed to parse \[([\w\-.]+)]]')

    def __init__(self, config, namespace='', quit_check_callback=None):
        super().__init__(config, namespace=namespace, quit_check_callback=quit_check_callback)

        self.es_context = self.config.elasticsearch.elasticsearch_class(
            config=self.config.elasticsearch
        )
        self.metrics = markus.get_metrics(namespace)

    def get_index_for_crash(self, crash_date):
        """Return the submission URL for a crash; based on the submission URL
        from config and the date of the crash.
        If the index name contains a datetime pattern (ex. %Y%m%d) then the
        crash_date will be parsed and appended to the index name.

        """
        index = self.config.elasticsearch.elasticsearch_index

        if not index:
            return None
        elif '%' in index:
            # Note that crash_date must be a datetime object!
            index = crash_date.strftime(index)

        return index

    def save_raw_and_processed(self, raw_crash, dumps, processed_crash, crash_id):
        """Save raw and processed crash data to Elasticsearch"""

        # Massage the crash such that the date_processed field is formatted
        # in the fashion of our established mapping.
        reconstitute_datetimes(processed_crash)

        # Remove bad keys from the raw crash--these keys are essentially
        # user-provided and can contain junk data
        remove_bad_keys(raw_crash)

        # Truncate values that are too long
        truncate_keyword_field_values(FIELDS, raw_crash)
        truncate_keyword_field_values(FIELDS, processed_crash)

        # Convert pseudo-boolean values to boolean values
        convert_booleans(FIELDS, raw_crash)
        convert_booleans(FIELDS, processed_crash)

        # Capture crash data size metrics--do this only after we've cleaned up
        # the crash data
        self.capture_crash_metrics(raw_crash, processed_crash)

        crash_document = {
            'crash_id': crash_id,
            'processed_crash': processed_crash,
            'raw_crash': raw_crash
        }

        self._submit_crash_to_elasticsearch(crash_document)

    def capture_crash_metrics(self, raw_crash, processed_crash):
        """Capture metrics about crash data being saved to Elasticsearch"""
        try:
            self.metrics.histogram(
                'raw_crash_size',
                value=len(json.dumps(raw_crash, cls=JsonDTEncoder))
            )
        except Exception:
            # NOTE(willkg): An error here shouldn't screw up saving data. Log it so we can fix it
            # later.
            self.logger.exception('something went wrong when capturing raw_crash_size')

        try:
            self.metrics.histogram(
                'processed_crash_size',
                value=len(json.dumps(processed_crash, cls=JsonDTEncoder))
            )
        except Exception:
            # NOTE(willkg): An error here shouldn't screw up saving data. Log it so we can fix it
            # later.
            self.logger.exception('something went wrong when capturing processed_crash_size')

    def _index_crash(self, connection, es_index, es_doctype, crash_document, crash_id):
        try:
            start_time = time.time()
            connection.index(
                index=es_index,
                doc_type=es_doctype,
                body=crash_document,
                id=crash_id
            )
            index_outcome = 'successful'
        except Exception:
            index_outcome = 'failed'
            raise
        finally:
            elapsed_time = time.time() - start_time
            self.metrics.histogram(
                'index',
                value=elapsed_time * 1000.0,
                tags=['outcome:' + index_outcome]
            )

    def _submit_crash_to_elasticsearch(self, crash_document):
        """Submit a crash report to elasticsearch"""
        index_name = self.get_index_for_crash(crash_document['processed_crash']['date_processed'])
        es_doctype = self.config.elasticsearch.elasticsearch_doctype
        crash_id = crash_document['crash_id']

        # Attempt to create the index; it's OK if it already exists.
        self.es_context.create_index(index_name)

        # Submit the crash for indexing.
        # Don't retry more than 5 times. That is to avoid infinite loops in
        # case of an unhandled exception.
        for attempt in range(5):
            try:
                with self.es_context() as conn:
                    return self._index_crash(conn, index_name, es_doctype, crash_document, crash_id)

            except elasticsearch.exceptions.ConnectionError:
                # If this is a connection error, sleep a second and then try again
                time.sleep(1.0)

            except elasticsearch.exceptions.TransportError as e:
                # If this is a TransportError, we try to figure out what the error
                # is and fix the document and try again
                field_name = None

                if 'MaxBytesLengthExceededException' in e.error:
                    # This is caused by a string that is way too long for
                    # Elasticsearch.
                    matches = self.field_name_string_error_re.findall(e.error)
                    if matches:
                        field_name = matches[0]
                elif 'NumberFormatException' in e.error:
                    # This is caused by a number that is either too big for
                    # Elasticsearch or just not a number.
                    matches = self.field_name_number_error_re.findall(e.error)
                    if matches:
                        field_name = matches[0]

                if not field_name:
                    # We are unable to parse which field to remove, we cannot
                    # try to fix the document. Let it raise.
                    self.logger.critical(
                        'Submission to Elasticsearch failed for %s (%s)',
                        crash_id,
                        e,
                        exc_info=True
                    )
                    raise

                if field_name.endswith('.full'):
                    # Remove the `.full` at the end, that is a special mapping
                    # construct that is not part of the real field name.
                    field_name = field_name.rstrip('.full')

                # Now remove that field from the document before trying again.
                field_path = field_name.split('.')
                parent = crash_document
                for i, field in enumerate(field_path):
                    if i == len(field_path) - 1:
                        # This is the last level, so `field` contains the name
                        # of the field that we want to remove from `parent`.
                        del parent[field]
                    else:
                        parent = parent[field]

                # Add a note in the document that a field has been removed.
                if crash_document.get('removed_fields'):
                    crash_document['removed_fields'] = '{} {}'.format(
                        crash_document['removed_fields'],
                        field_name
                    )
                else:
                    crash_document['removed_fields'] = field_name

            except elasticsearch.exceptions.ElasticsearchException as exc:
                self.logger.critical(
                    'Submission to Elasticsearch failed for %s (%s)',
                    crash_id,
                    exc,
                    exc_info=True
                )
                raise
Beispiel #55
0
class B(A):
    foo = 'b'
    required_config = Namespace()
    required_config.add_option('z', default=2)
Beispiel #56
0
class ReprocessCrashlistApp(App):
    app_name = 'reprocess_crashlist'
    app_version = '1.0'
    app_description = __doc__

    required_config = Namespace()
    required_config.namespace('reprocesscrashlist')
    required_config.reprocesscrashlist.add_option(
        'host',
        doc='host to connect to for RabbitMQ',
        default='localhost',
        reference_value_from='resource.rabbitmq',
    )
    required_config.reprocesscrashlist.add_option(
        'port',
        doc='port to connect to for RabbitMQ',
        default=5672,
        reference_value_from='resource.rabbitmq',
    )
    required_config.reprocesscrashlist.add_option(
        'rabbitmq_user',
        doc='user to connect to for RabbitMQ',
        default='guest',
        reference_value_from='secrets.rabbitmq',
    )
    required_config.reprocesscrashlist.add_option(
        'rabbitmq_password',
        doc="the user's RabbitMQ password",
        default='guest',
        reference_value_from='secrets.rabbitmq',
        secret=True,
    )
    required_config.reprocesscrashlist.add_option(
        name='virtual_host',
        doc='the name of the RabbitMQ virtual host',
        default='/',
        reference_value_from='resource.rabbitmq',
    )
    required_config.reprocesscrashlist.add_option(
        'crashes',
        doc='File containing crash UUIDs, one per line',
        default='crashlist.txt')

    def connect(self):
        logging.debug("connecting to rabbit")
        config = self.config.reprocesscrashlist
        try:
            connection = pika.BlockingConnection(
                pika.ConnectionParameters(
                    host=config.host,
                    port=config.port,
                    virtual_host=config.virtual_host,
                    credentials=pika.credentials.PlainCredentials(
                        config.rabbitmq_user, config.rabbitmq_password)))
        except Exception:
            logging.error("Failed to connect")
            raise
        self.connection = connection

    def main(self):
        self.connect()
        channel = self.connection.channel()

        channel.queue_declare(queue='socorro.reprocessing', durable=True)

        with open(self.config.reprocesscrashlist.crashes, 'r') as file:
            for uuid in file.read().splitlines():
                channel.basic_publish(
                    exchange='',
                    routing_key="socorro.reprocessing",
                    body=uuid,
                    properties=pika.BasicProperties(delivery_mode=2))
                logging.debug('submitted %s' % uuid)

        self.connection.close()
Beispiel #57
0
    def test_poly_crash_storage(self):
        n = Namespace()
        n.add_option(
            'storage',
            default=PolyCrashStorage,
        )
        n.add_option(
            'logger',
            default=mock.Mock(),
        )
        value = {
            'storage_classes':
            'socorro.unittest.external.test_crashstorage_base.A,'
            'socorro.unittest.external.test_crashstorage_base.A,'
            'socorro.unittest.external.test_crashstorage_base.B',
            'storage1.y':
            37,
        }
        cm = ConfigurationManager(n, values_source_list=[value])
        with cm.context() as config:
            self.assertEqual(config.storage0.crashstorage_class.foo, 'a')
            self.assertEqual(config.storage1.crashstorage_class.foo, 'a')
            self.assertEqual(config.storage1.y, 37)
            self.assertEqual(config.storage2.crashstorage_class.foo, 'b')

            poly_store = config.storage(config)
            l = len(poly_store.storage_namespaces)
            self.assertEqual(
                l, 3, 'expected poly_store to have lenth of 3, '
                'but %d was found instead' % l)
            self.assertEqual(poly_store.storage_namespaces[0], 'storage0')
            self.assertEqual(poly_store.storage_namespaces[1], 'storage1')
            self.assertEqual(poly_store.storage_namespaces[2], 'storage2')
            l = len(poly_store.stores)
            self.assertEqual(
                l, 3, 'expected poly_store.store to have lenth of 3, '
                'but %d was found instead' % l)
            self.assertEqual(poly_store.stores.storage0.foo, 'a')
            self.assertEqual(poly_store.stores.storage1.foo, 'a')
            self.assertEqual(poly_store.stores.storage2.foo, 'b')

            raw_crash = {'ooid': ''}
            dump = '12345'
            processed_crash = {'ooid': '', 'product': 17}
            for v in poly_store.stores.itervalues():
                v.save_raw_crash = Mock()
                v.save_processed = Mock()
                v.close = Mock()

            poly_store.save_raw_crash(raw_crash, dump, '')
            for v in poly_store.stores.itervalues():
                v.save_raw_crash.assert_called_once_with(raw_crash, dump, '')

            poly_store.save_processed(processed_crash)
            for v in poly_store.stores.itervalues():
                v.save_processed.assert_called_once_with(processed_crash)

            poly_store.save_raw_and_processed(raw_crash, dump, processed_crash,
                                              'n')
            for v in poly_store.stores.itervalues():
                v.save_raw_crash.assert_called_with(raw_crash, dump, 'n')
                v.save_processed.assert_called_with(processed_crash)

            raw_crash = {'ooid': 'oaeu'}
            dump = '5432'
            processed_crash = {'ooid': 'aoeu', 'product': 33}

            poly_store.stores['storage1'].save_raw_crash = Mock()
            poly_store.stores['storage1'].save_raw_crash.side_effect = \
                Exception('this is messed up')
            poly_store.stores['storage2'].save_processed = Mock()
            poly_store.stores['storage2'].save_processed.side_effect = \
                Exception('this is messed up')

            self.assertRaises(PolyStorageError, poly_store.save_raw_crash,
                              raw_crash, dump, '')
            for v in poly_store.stores.itervalues():
                v.save_raw_crash.assert_called_with(raw_crash, dump, '')

            self.assertRaises(PolyStorageError, poly_store.save_processed,
                              processed_crash)
            for v in poly_store.stores.itervalues():
                v.save_processed.assert_called_with(processed_crash)

            poly_store.stores['storage2'].close.side_effect = \
                Exception
            self.assertRaises(PolyStorageError, poly_store.close)
            for v in poly_store.stores.itervalues():
                v.close.assert_called_with()
Beispiel #58
0
class MiddlewareApp(App):
    app_name = 'middleware'
    app_version = '3.1'
    app_description = __doc__

    #--------------------------------------------------------------------------
    # in this section, define any configuration requirements
    required_config = Namespace()

    #--------------------------------------------------------------------------
    # implementations namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('implementations')
    required_config.implementations.add_option(
        'implementation_list',
        doc='list of packages for service implementations',
        default='psql:socorro.external.postgresql, '
        'hbase:socorro.external.hb, '
        'es:socorro.external.elasticsearch, '
        'fs:socorro.external.fs, '
        'http:socorro.external.http, '
        'rabbitmq:socorro.external.rabbitmq',
        from_string_converter=items_list_decode,
        to_string_converter=items_list_encode)

    required_config.implementations.add_option(
        'service_overrides',
        doc='comma separated list of class overrides, e.g `Crashes: hbase`',
        default='CrashData: fs, '
        'Correlations: http, '
        'CorrelationsSignatures: http, '
        'SuperSearch: es, '
        'Priorityjobs: rabbitmq, '
        'Query: es',
        from_string_converter=items_list_decode,
        to_string_converter=items_list_encode)

    #--------------------------------------------------------------------------
    # database namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('database')
    required_config.database.add_option(
        'database_class',
        default='socorro.external.postgresql.connection_context.'
        'ConnectionContext',
        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # hbase namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('hbase')
    required_config.hbase.add_option(
        'hbase_class',
        default='socorro.external.hb.crashstorage.HBaseCrashStorage',
        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # filesystem namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('filesystem')
    required_config.filesystem.add_option(
        'filesystem_class',
        default='socorro.external.fs.crashstorage.FSLegacyRadixTreeStorage',
        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # rabbitmq namespace
    #     the namespace is for external implementations of the services
    #-------------------------------------------------------------------------
    required_config.namespace('rabbitmq')
    required_config.rabbitmq.add_option(
        'rabbitmq_class',
        default='socorro.external.rabbitmq.connection_context.'
        'ConnectionContext',
        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # webapi namespace
    #     this is all config options that used to belong to webapiconfig.py
    #-------------------------------------------------------------------------
    required_config.namespace('webapi')
    required_config.webapi.add_option(
        'elasticSearchHostname',
        default='localhost',
        doc='String containing the URI of the Elastic Search instance.',
        reference_value_from='resource.elasticsearch',
    )
    required_config.webapi.add_option(
        'elasticSearchPort',
        default='9200',
        doc='String containing the port on which calling the Elastic '
        'Search instance.',
        reference_value_from='resource.elasticsearch',
    )
    required_config.webapi.add_option(
        'elasticsearch_urls',
        default=['http://localhost:9200'],
        doc='the urls to the elasticsearch instances',
        from_string_converter=string_to_list,
        reference_value_from='resource.elasticsearch',
    )
    required_config.add_option(
        'elasticsearch_default_index',
        default='socorro',
        doc='the default index used to store data',
        reference_value_from='resource.elasticsearch',
    )
    required_config.webapi.add_option(
        'elasticsearch_index',
        default='socorro%Y%W',
        doc='an index format to pull crashes from elasticsearch '
        "(use datetime's strftime format to have "
        'daily, weekly or monthly indexes)',
        reference_value_from='resource.elasticsearch',
    )
    required_config.webapi.add_option(
        'elasticsearch_doctype',
        default='crash_reports',
        doc='the default doctype to use in elasticsearch',
        reference_value_from='resource.elasticsearch',
    )
    required_config.webapi.add_option(
        'elasticsearch_timeout',
        default=30,
        doc='the time in seconds before a query to elasticsearch fails',
        reference_value_from='resource.elasticsearch',
    )
    required_config.webapi.add_option(
        'elasticsearch_timeout_extended',
        default=120,
        doc='the time in seconds before a query to elasticsearch fails in '
        'restricted sections',
        reference_value_from='resource.elasticsearch',
    )
    required_config.webapi.add_option(
        'facets_max_number',
        default=50,
        doc='the maximum number of results a facet will return in search')
    required_config.webapi.add_option(
        'searchMaxNumberOfDistinctSignatures',
        default=1000,
        doc='Integer containing the maximum allowed number of distinct '
        'signatures the system should retrieve. Used mainly for '
        'performances in ElasticSearch')
    required_config.webapi.add_option(
        'search_default_date_range',
        default=7,  # in days
        doc='the default date range for searches, in days')
    required_config.webapi.add_option(
        'search_maximum_date_range',
        default=365,  # in days
        doc='the maximum date range for searches, in days')
    required_config.webapi.add_option(
        'platforms',
        default=[
            {
                "id": "windows",
                "name": "Windows NT"
            },
            {
                "id": "mac",
                "name": "Mac OS X"
            },
            {
                "id": "linux",
                "name": "Linux"
            },
        ],
        doc='Array associating OS ids to full names.',
        from_string_converter=lambda x: json.loads(x))
    required_config.webapi.add_option(
        'non_release_channels',
        default=['beta', 'aurora', 'nightly'],
        doc='List of channels, excluding the `release` one.',
        from_string_converter=string_to_list)
    required_config.webapi.add_option(
        'restricted_channels',
        default=['beta'],
        doc='List of channels to restrict based on build ids.',
        from_string_converter=string_to_list)

    #--------------------------------------------------------------------------
    # web_server namespace
    #     the namespace is for config parameters the web server
    #--------------------------------------------------------------------------
    required_config.namespace('web_server')
    required_config.web_server.add_option(
        'wsgi_server_class',
        doc='a class implementing a wsgi web server',
        default='socorro.webapi.servers.CherryPy',
        from_string_converter=class_converter)

    #--------------------------------------------------------------------------
    # http namespace
    #     the namespace is for config parameters the http modules
    #--------------------------------------------------------------------------
    required_config.namespace('http')
    required_config.http.namespace('correlations')
    required_config.http.correlations.add_option(
        'base_url',
        doc='Base URL where correlations text files are',
        default='https://crash-analysis.mozilla.com/crash_analysis/',
    )
    required_config.http.correlations.add_option(
        'save_download',
        doc='Whether files downloaded for correlations should be '
        'temporary stored on disk',
        default=True,
    )
    required_config.http.correlations.add_option(
        'save_seconds',
        doc='Number of seconds that the downloaded .txt file is stored '
        'in a temporary place',
        default=60 * 10,
    )
    required_config.http.correlations.add_option(
        'save_root',
        doc='Directory where the temporary downloads are stored '
        '(if left empty will become the systems tmp directory)',
        default='',
    )

    #--------------------------------------------------------------------------
    # sentry namespace
    #     the namespace is for Sentry error capturing with Raven
    #--------------------------------------------------------------------------
    required_config.namespace('sentry')
    required_config.sentry.add_option(
        'dsn',
        doc='DSN for Sentry via raven',
        default='',
        reference_value_from='secrets.sentry',
    )
    #--------------------------------------------------------------------------
    # laglog namespace
    #     the namespace for the replica lag log
    #--------------------------------------------------------------------------
    required_config.namespace('laglog')
    required_config.laglog.add_option(
        'max_bytes_warning',
        default=16 * 1024 * 1024,
        doc="Number of bytes that warrents a warning")
    required_config.laglog.add_option(
        'max_bytes_critical',
        default=32 * 1024 * 1024,
        doc="Number of bytes that warrents a critial")

    # because the socorro.webapi.servers classes bring up their own default
    # configurations like port number, the only way to override the default
    # is like this:
    from socorro.webapi.servers import StandAloneServer
    StandAloneServer.required_config.port.set_default(8883, force=True)

    #--------------------------------------------------------------------------
    def main(self):
        # Apache modwsgi requireds a module level name 'application'
        global application

        # 1 turn these names of classes into real references to classes
        def lookup(file_and_class):
            file_name, class_name = file_and_class.rsplit('.', 1)
            overrides = dict(self.config.implementations.service_overrides)
            _list = self.config.implementations.implementation_list
            for prefix, base_module_path in _list:
                if class_name in overrides:
                    if prefix != overrides[class_name]:
                        continue
                try:
                    module = __import__(
                        '%s.%s' % (base_module_path, file_name), globals(),
                        locals(), [class_name])
                except ImportError:
                    raise ImportError(
                        "Unable to import %s.%s.%s" %
                        (base_module_path, file_name, class_name))
                return getattr(module, class_name)
            raise ImplementationConfigurationError(file_and_class)

        # This list will hold the collection of url/service-implementations.
        # It is populated in the for loop a few lines lower in this file.
        # This list is used in the 'wrap' function so that all services have
        # place to lookup dependent services.

        all_services_mapping = {}

        # 2 wrap each service class with the ImplementationWrapper class
        def wrap(cls, file_and_class):
            return type(
                cls.__name__,
                (ImplementationWrapper, ),
                {
                    'cls': cls,
                    'file_and_class': file_and_class,
                    # give lookup access of dependent services to all services
                    'all_services': all_services_mapping,
                })

        services_list = []
        # populate the 'services_list' with the tuples that will define the
        # urls and services offered by the middleware.
        for url, impl_class in SERVICES_LIST:
            impl_instance = lookup(impl_class)
            wrapped_impl = wrap(impl_instance, impl_class)
            services_list.append((url, wrapped_impl))
            all_services_mapping[impl_instance.__name__] = wrapped_impl

        self.web_server = self.config.web_server.wsgi_server_class(
            self.config,  # needs the whole config not the local namespace
            services_list)

        # for modwsgi the 'run' method returns the wsgi function that Apache
        # will use.  For other webservers, the 'run' method actually starts
        # the standalone web server.
        application = self.web_server.run()
Beispiel #59
0
def config_from_configman():
    definition_source = Namespace()
    definition_source.namespace('logging')
    definition_source.logging = socorro_app.App.required_config.logging

    definition_source.namespace('metricscfg')
    definition_source.metricscfg = socorro_app.App.required_config.metricscfg

    definition_source.namespace('elasticsearch')
    definition_source.elasticsearch.add_option(
        'elasticsearch_class',
        default=ElasticsearchConfig,
    )
    definition_source.namespace('database')
    definition_source.database.add_option(
        'database_storage_class',
        default=PostgreSQLCrashStorage,
    )
    definition_source.namespace('queuing')
    definition_source.queuing.add_option(
        'rabbitmq_reprocessing_class',
        default=ReprocessingOneRabbitMQCrashStore,
    )
    definition_source.namespace('priority')
    definition_source.priority.add_option(
        'rabbitmq_priority_class',
        default=PriorityjobRabbitMQCrashStore,
    )
    definition_source.namespace('data')
    definition_source.data.add_option(
        'crash_data_class',
        default=socorro.external.boto.crash_data.SimplifiedCrashData,
    )
    config = configuration(definition_source=definition_source,
                           values_source_list=[
                               settings.SOCORRO_IMPLEMENTATIONS_CONFIG,
                           ])
    # The ReprocessingOneRabbitMQCrashStore crash storage, needs to have
    # a "logger" in the config object. To avoid having to use the
    # logger set up by configman as an aggregate, we just use the
    # same logger as we have here in the webapp.
    config.queuing.logger = logger
    config.priority.logger = logger
    config.data.logger = logger
    return config
Beispiel #60
0
class FileSystemCrashStorage(FileSystemThrottledCrashStorage):
    """This storage class is the only file system based crash storage system
    appropriate for storing both raw and processed crashes.  This class uses
    the same segregating raw crash storage as the previous class and adds
    processed storage.  Processed crashes are stored in their own file system
    root, 'pro_fs_root' (processed file system root) using the same radix
    directory system as the raw crashes."""

    required_config = Namespace()
    required_config.add_option(
        'pro_fs_root',
        doc='a path to a local file system for processed storage',
        default='./processedCrashStore',
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'minutes_per_slot',
        doc='the number of minutes in the lowest date directory',
        default=1,
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'sub_slot_count',
        doc='distribute data evenly among this many sub timeslots',
        default=1,
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'index_name',
        doc='the relative path to the top of the name storage tree from '
        'root parameter',
        default='name',
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'date_name',
        doc='the relative path to the top of the date storage tree from '
        'root parameter',
        default='date',
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'processed_crash_file_suffix',
        doc='the processed crash filename suffix',
        default='.jsonz',
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'gzip_compression_level',
        doc='the level of compression to use',
        default=9,
        reference_value_from='resource.filesystem',
    )
    required_config.add_option(
        'storage_depth',
        doc='the length of branches in the radix storage tree',
        default=2,
        reference_value_from='resource.filesystem',
    )

    #--------------------------------------------------------------------------
    def __init__(self, config, quit_check_callback=None):
        super(FileSystemCrashStorage, self).__init__(config)
        self.pro_crash_store = ProcessedDumpStorage(
            root=config.pro_fs_root,
            minutesPerSlot=config.minutes_per_slot,
            subSlotCount=config.sub_slot_count,
            indexName=config.index_name,
            dateName=config.date_name,
            fileSuffix=config.processed_crash_file_suffix,
            gzipCompression=config.gzip_compression_level,
            storageDepth=config.storage_depth,
            dumpGID=config.dump_gid,
            dumpPermissions=config.dump_permissions,
            dirPermissions=config.dir_permissions,
        )

    #--------------------------------------------------------------------------
    def save_processed(self, processed_crash):
        """save a processed crash (in the form of a Mapping) into a json
        file.  It first gets the underlying file system to give it a file
        handle open for writing, then it uses the 'json' module to write
        the mapping to the open file handle."""
        try:
            crash_id = processed_crash['uuid']
        except KeyError:
            raise CrashIDNotFound("uuid missing from processed_crash")
        try:
            self._stringify_dates_in_dict(processed_crash)
            processed_crash_file_handle = \
                self.pro_crash_store.newEntry(crash_id)
            try:
                json.dump(processed_crash, processed_crash_file_handle)
            finally:
                processed_crash_file_handle.close()
            self.logger.debug('saved processed- %s', crash_id)
        except Exception:
            self.logger.critical(
                'processed file system storage has failed for: %s',
                crash_id,
                exc_info=True)
            raise

    #--------------------------------------------------------------------------
    def get_unredacted_processed(self, crash_id):
        """fetch a processed json file from the underlying file system"""
        try:
            return self.pro_crash_store.getDumpFromFile(crash_id)
        except OSError:
            raise CrashIDNotFound(crash_id)

    #--------------------------------------------------------------------------
    def remove(self, crash_id):
        """remove the all traces of a crash, both raw and processed from the
        file system."""
        try:
            super(FileSystemCrashStorage, self).remove(crash_id)
        except CrashIDNotFound:
            self.logger.warning('raw crash not found for deletion: %s',
                                crash_id)
        try:
            self.pro_crash_store.removeDumpFile(crash_id)
        except OSError:
            self.logger.warning('processed crash not found for deletion: %s',
                                crash_id)

    #--------------------------------------------------------------------------
    @staticmethod
    def _stringify_dates_in_dict(a_dict):
        for name, value in a_dict.iteritems():
            if isinstance(value, datetime.datetime):
                a_dict[name] = (
                    "%4d-%02d-%02d %02d:%02d:%02d.%d" %
                    (value.year, value.month, value.day, value.hour,
                     value.minute, value.second, value.microsecond))