Esempio n. 1
0
def pool_from_config(app_config,
                     prefix="memcache.",
                     serializer=None,
                     deserializer=None):
    """Make a PooledClient from a configuration dictionary.

    The keys useful to :py:func:`pool_from_config` should be prefixed, e.g.
    ``memcache.endpoint``, ``memcache.max_pool_size``, etc. The ``prefix``
    argument specifies the prefix used to filter keys. Each key is mapped to a
    corresponding keyword argument on the
    :py:class:`~pymemcache.client.base.PooledClient` constructor.

    Supported keys:

    * ``endpoint`` (required): a string representing a host and port to connect
        to memcached service, e.g. ``localhost:11211`` or ``127.0.0.1:11211``.
    * ``max_pool_size``: an integer for the maximum pool size to use, by default
        this is ``2147483648``.
    * ``connect_timeout``: a float representing seconds to wait for a connection to
        memcached server. Defaults to the underlying socket default timeout.
    * ``timeout``: a float representing seconds to wait for calls on the
        socket connected to memcache. Defaults to the underlying socket default
        timeout.

    :param dict app_config: the config dictionary
    :param str prefix: prefix for config keys
    :param callable serializer: function to serialize values to strings suitable
        for being stored in memcached. An example is
        :py:func:`~baseplate.context.memcache.lib.make_dump_and_compress_fn`.
    :param callable deserializer: function to convert strings returned from
        memcached to arbitrary objects, must be compatible with ``serializer``.
        An example is :py:func:`~baseplate.context.memcache.lib.decompress_and_load`.

    :returns: :py:class:`pymemcache.client.base.PooledClient`

    """
    assert prefix.endswith(".")
    parser = config.SpecParser({
        "endpoint":
        config.Endpoint,
        "max_pool_size":
        config.Optional(config.Integer, default=None),
        "connect_timeout":
        config.Optional(config.Float, default=None),
        "timeout":
        config.Optional(config.Float, default=None),
        "no_delay":
        config.Optional(config.Boolean, default=True),
    })
    options = parser.parse(prefix[:-1], app_config)

    return PooledClient(
        server=options.endpoint.address,
        connect_timeout=options.connect_timeout,
        timeout=options.timeout,
        serializer=serializer,
        deserializer=deserializer,
        no_delay=options.no_delay,
        max_pool_size=options.max_pool_size,
    )
Esempio n. 2
0
def make_server(server_config, listener, app):
    """Make a gevent server for WSGI apps."""
    # pylint: disable=maybe-no-member
    cfg = config.parse_config(
        server_config,
        {
            "handler": config.Optional(config.String, default=None),
            "max_concurrency": config.Integer,
            "stop_timeout": config.Optional(config.Integer, default=0),
        },
    )

    pool = Pool(size=cfg.max_concurrency)
    log = LoggingLogAdapter(logger, level=logging.DEBUG)

    kwargs = {}
    if gevent.version_info[:2] >= (1, 1):  # error_log is new in 1.1
        kwargs["error_log"] = LoggingLogAdapter(logger, level=logging.ERROR)

    if cfg.handler:
        kwargs["handler_class"] = _load_factory(cfg.handler, default_name=None)

    server = WSGIServer(listener, application=app, spawn=pool, log=log, **kwargs)
    server.stop_timeout = cfg.stop_timeout

    runtime_monitor.start(server_config, app, pool)
    return server
Esempio n. 3
0
def publish_traces():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("config_file", type=argparse.FileType("r"),
        help="path to a configuration file")
    arg_parser.add_argument("--queue-name", default="main",
        help="name of trace queue / publisher config (default: main)")
    arg_parser.add_argument("--debug", default=False, action="store_true",
        help="enable debug logging")
    arg_parser.add_argument("--app-name", default="main", metavar="NAME",
        help="name of app to load from config_file (default: main)")
    args = arg_parser.parse_args()

    if args.debug:
        level = logging.DEBUG
    else:
        level = logging.WARNING
    logging.basicConfig(level=level)

    config_parser = configparser.RawConfigParser()
    config_parser.readfp(args.config_file)

    publisher_raw_cfg = dict(config_parser.items("trace-publisher:" + args.queue_name))
    publisher_cfg = config.parse_config(publisher_raw_cfg, {
        "zipkin_api_url": config.Endpoint,
        "post_timeout": config.Optional(config.Integer, POST_TIMEOUT_DEFAULT),
        "max_batch_size": config.Optional(config.Integer, MAX_BATCH_SIZE_DEFAULT),
        "retry_limit": config.Optional(config.Integer, RETRY_LIMIT_DEFAULT),
    })

    trace_queue = MessageQueue(
        "/traces-" + args.queue_name,
        max_messages=MAX_QUEUE_SIZE,
        max_message_size=MAX_SPAN_SIZE,
    )

    # pylint: disable=maybe-no-member
    inner_batch = TraceBatch(max_size=publisher_cfg.max_batch_size)
    batcher = TimeLimitedBatch(inner_batch, MAX_BATCH_AGE)
    metrics_client = metrics_client_from_config(publisher_raw_cfg)
    publisher = ZipkinPublisher(
        publisher_cfg.zipkin_api_url.address,
        metrics_client,
        post_timeout=publisher_cfg.post_timeout,
    )

    while True:
        try:
            message = trace_queue.get(timeout=.2)
        except TimedOutError:
            message = None

        try:
            batcher.add(message)
        except BatchFull:
            serialized = batcher.serialize()
            publisher.publish(serialized)
            batcher.reset()
            batcher.add(message)
Esempio n. 4
0
def thrift_pool_from_config(app_config, prefix, **kwargs):
    """Make a ThriftConnectionPool from a configuration dictionary.

    The keys useful to :py:func:`thrift_pool_from_config` should be prefixed,
    e.g.  ``example_service.endpoint`` etc. The ``prefix`` argument specifies
    the prefix used to filter keys.  Each key is mapped to a corresponding
    keyword argument on the :py:class:`ThriftConnectionPool` constructor.  Any
    keyword arguments given to this function will be also be passed through to
    the constructor. Keyword arguments take precedence over the configuration
    file.

    Supported keys:

    * ``endpoint`` (required): A ``host:port`` pair, e.g. ``localhost:2014``,
        where the Thrift server can be found.
    * ``size``: The size of the connection pool.
    * ``max_age``: The oldest a connection can be before it's recycled and
        replaced with a new one. Written as a time span e.g. ``1 minute``.
    * ``timeout``: The maximum amount of time a connection attempt or RPC call
        can take before a TimeoutError is raised.
    * ``max_retries``: The maximum number of times the pool will attempt to
        open a connection.

    """
    assert prefix.endswith(".")
    config_prefix = prefix[:-1]

    cfg = config.parse_config(
        app_config,
        {
            config_prefix: {
                "endpoint":
                config.Endpoint,
                "size":
                config.Optional(config.Integer, default=10),
                "max_age":
                config.Optional(config.Timespan,
                                default=config.Timespan("1 minute")),
                "timeout":
                config.Optional(config.Timespan,
                                default=config.Timespan("1 second")),
                "max_retries":
                config.Optional(config.Integer, default=3),
            }
        },
    )
    options = getattr(cfg, config_prefix)

    if options.size is not None:
        kwargs.setdefault("size", options.size)
    if options.max_age is not None:
        kwargs.setdefault("max_age", options.max_age.total_seconds())
    if options.timeout is not None:
        kwargs.setdefault("timeout", options.timeout.total_seconds())
    if options.max_retries is not None:
        kwargs.setdefault("max_retries", options.max_retries)

    return ThriftConnectionPool(endpoint=options.endpoint, **kwargs)
Esempio n. 5
0
def cluster_from_config(
    app_config: config.RawConfig,
    secrets: Optional[SecretsStore] = None,
    prefix: str = "cassandra.",
    execution_profiles: Optional[Dict[str, ExecutionProfile]] = None,
    **kwargs: Any,
):
    """Make a Cluster from a configuration dictionary.

    The keys useful to :py:func:`cluster_from_config` should be prefixed, e.g.
    ``cassandra.contact_points`` etc. The ``prefix`` argument specifies the
    prefix used to filter keys.  Each key is mapped to a corresponding keyword
    argument on the :py:class:`~cassandra.cluster.Cluster` constructor.  Any
    keyword arguments given to this function will be passed through to the
    :py:class:`~cassandra.cluster.Cluster` constructor. Keyword arguments take
    precedence over the configuration file.

    Supported keys:

    * ``contact_points`` (required): comma delimited list of contact points to
      try connecting for cluster discovery
    * ``port``: The server-side port to open connections to.
    * ``credentials_secret`` (optional): the key used to retrieve the database
        credentials from ``secrets`` as a :py:class:`~baseplate.secrets.CredentialSecret`.

    :param execution_profiles: Configured execution profiles to provide to the
        rest of the application.

    """
    assert prefix.endswith(".")
    parser = config.SpecParser({
        "contact_points":
        config.TupleOf(config.String),
        "port":
        config.Optional(config.Integer, default=None),
        "credentials_secret":
        config.Optional(config.String),
    })
    options = parser.parse(prefix[:-1], app_config)

    if options.port:
        kwargs.setdefault("port", options.port)

    if options.credentials_secret:
        if not secrets:
            raise TypeError(
                "'secrets' is required if 'credentials_secret' is set")
        credentials = secrets.get_credentials(options.credentials_secret)
        kwargs.setdefault(
            "auth_provider",
            PlainTextAuthProvider(username=credentials.username,
                                  password=credentials.password),
        )

    return Cluster(options.contact_points,
                   execution_profiles=execution_profiles,
                   **kwargs)
Esempio n. 6
0
def start(server_config, application, pool):
    if not hasattr(application,
                   "baseplate") or not application.baseplate._metrics_client:
        logging.info(
            "No metrics client configured. Server metrics will not be sent.")
        return

    cfg = config.parse_config(
        server_config,
        {
            "monitoring": {
                "blocked_hub": config.Optional(config.Timespan, default=None),
                "concurrency": config.Optional(config.Boolean, default=True),
                "gc": {
                    "stats": config.Optional(config.Boolean, default=True),
                    "timing": config.Optional(config.Boolean, default=False),
                },
            }
        },
    )

    reporters = []

    if cfg.monitoring.concurrency:
        reporters.append(_ConcurrencyReporter(pool))

    if cfg.monitoring.blocked_hub is not None:
        try:
            reporters.append(
                _BlockedGeventHubReporter(
                    cfg.monitoring.blocked_hub.total_seconds()))
        except Exception as exc:
            logging.info("monitoring.blocked_hub disabled: %s", exc)

    if cfg.monitoring.gc.stats:
        try:
            reporters.append(_GCStatsReporter())
        except Exception as exc:
            logging.info("monitoring.gc.stats disabled: %s", exc)

    if cfg.monitoring.gc.timing:
        try:
            reporters.append(_GCTimingReporter())
        except Exception as exc:
            logging.info("monitoring.gc.timing disabled: %s", exc)

    thread = threading.Thread(
        name="Server Monitoring",
        target=_report_runtime_metrics_periodically,
        args=(application.baseplate._metrics_client, reporters),
    )
    thread.daemon = True
    thread.start()
Esempio n. 7
0
def main():
    arg_parser = argparse.ArgumentParser(
        description=sys.modules[__name__].__doc__)
    arg_parser.add_argument("config_file",
                            type=argparse.FileType("r"),
                            help="path to a configuration file")
    arg_parser.add_argument("--debug",
                            default=False,
                            action="store_true",
                            help="enable debug logging")
    args = arg_parser.parse_args()

    if args.debug:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level, format="%(message)s")

    # quiet kazoo's verbose logs a bit
    logging.getLogger("kazoo").setLevel(logging.WARNING)

    parser = configparser.RawConfigParser()
    parser.readfp(args.config_file)  # pylint: disable=deprecated-method
    watcher_config = dict(parser.items("live-data"))

    cfg = config.parse_config(
        watcher_config,
        {
            "nodes":
            config.DictOf(
                {
                    "source": config.String,
                    "dest": config.String,
                    "owner": config.Optional(config.UnixUser),
                    "group": config.Optional(config.UnixGroup),
                    "mode": config.Optional(config.Integer(base=8),
                                            default=0o400),
                })
        },
    )
    # pylint: disable=maybe-no-member
    nodes = cfg.nodes.values()

    secrets = secrets_store_from_config(watcher_config, timeout=30)
    zookeeper = zookeeper_client_from_config(secrets,
                                             watcher_config,
                                             read_only=True)
    zookeeper.start()
    try:
        watch_zookeeper_nodes(zookeeper, nodes)
    finally:
        zookeeper.stop()
Esempio n. 8
0
def experiments_client_from_config(app_config,
                                   event_logger,
                                   prefix="experiments."):
    """Configure and return an :py:class:`ExperimentsContextFactory` object.

    The keys useful to :py:func:`experiments_client_from_config` should be prefixed, e.g.
    ``experiments.path``, etc.

    Supported keys:

    ``path``: the path to the experiment config file generated by the experiment
        config fetcher daemon.
    ``timeout`` (optional): the time that we should wait for the file specified by
        ``path`` to exist.  Defaults to `None` which is `infinite`.

    :param dict raw_config: The application configuration which should have
        settings for the experiments client.
    :param baseplate.events.EventLogger event_logger: The EventLogger to be used
        to log bucketing events.
    :param str prefix: the prefix used to filter keys (defaults to "experiments.").

    :rtype: :py:class:`ExperimentsContextFactory`

    """
    assert prefix.endswith(".")
    config_prefix = prefix[:-1]

    cfg = config.parse_config(
        app_config,
        {
            config_prefix: {
                "path":
                config.Optional(config.String,
                                default="/var/local/experiments.json"),
                "timeout":
                config.Optional(config.Timespan),
            }
        },
    )
    options = getattr(cfg, config_prefix)

    # pylint: disable=maybe-no-member
    if options.timeout:
        timeout = options.timeout.total_seconds()
    else:
        timeout = None

    return ExperimentsContextFactory(options.path,
                                     event_logger,
                                     timeout=timeout)
Esempio n. 9
0
def pool_from_config(app_config, prefix="redis.", **kwargs):
    """Make a ConnectionPool from a configuration dictionary.

    The keys useful to :py:func:`pool_from_config` should be prefixed, e.g.
    ``redis.url``, ``redis.max_connections``, etc. The ``prefix`` argument
    specifies the prefix used to filter keys.  Each key is mapped to a
    corresponding keyword argument on the :py:class:`redis.ConnectionPool`
    constructor.

    Supported keys:

    * ``url`` (required): a URL like ``redis://localhost/0``.
    * ``max_connections``: an integer maximum number of connections in the pool
    * ``socket_connect_timeout``: a timespan of how long to wait for sockets
        to connect. e.g. ``200 milliseconds``.
    * ``socket_timeout``: a timespan of how long to wait for socket operations,
        e.g. ``200 milliseconds``.

    """
    assert prefix.endswith(".")
    config_prefix = prefix[:-1]
    cfg = config.parse_config(
        app_config,
        {
            config_prefix: {
                "url":
                config.String,
                "max_connections":
                config.Optional(config.Integer, default=None),
                "socket_connect_timeout":
                config.Optional(config.Timespan, default=None),
                "socket_timeout":
                config.Optional(config.Timespan, default=None),
            }
        },
    )

    options = getattr(cfg, config_prefix)

    if options.max_connections is not None:
        kwargs.setdefault("max_connections", options.max_connections)
    if options.socket_connect_timeout is not None:
        kwargs.setdefault("socket_connect_timeout",
                          options.socket_connect_timeout.total_seconds())
    if options.socket_timeout is not None:
        kwargs.setdefault("socket_timeout",
                          options.socket_timeout.total_seconds())

    return redis.BlockingConnectionPool.from_url(options.url, **kwargs)
Esempio n. 10
0
def hvac_factory_from_config(app_config, secrets_store, prefix="vault."):
    """Make an HVAC client factory from a configuration dictionary.

    The keys useful to :py:func:`hvac_factory_from_config` should be prefixed,
    e.g.  ``vault.timeout``. The ``prefix`` argument specifies the prefix used
    to filter keys.

    Supported keys:

    * ``timeout``: How long to wait for calls to Vault.

    :param dict app_config: The raw application configuration.
    :param baseplate.secrets.SecretsStore secrets_store: A configured secrets
        store from which we can get a Vault authentication token.
    :param str prefix: The prefix for configuration keys.

    """
    assert prefix.endswith(".")
    parser = config.SpecParser({
        "timeout":
        config.Optional(config.Timespan, default=datetime.timedelta(seconds=1))
    })
    options = parser.parse(prefix[:-1], app_config)

    return HvacContextFactory(secrets_store, options.timeout)
Esempio n. 11
0
def make_processor(app_config):  # pragma: nocover
    cfg = config.parse_config(app_config, {
        "activity": {
            "window": config.Timespan,
            "fuzz_threshold": config.Integer,
        },

        "redis": {
            "url": config.String,
            "max_connections": config.Optional(config.Integer, default=100),
        },
    })

    metrics_client = make_metrics_client(app_config)
    redis_pool = redis.BlockingConnectionPool.from_url(
        cfg.redis.url,
        max_connections=cfg.redis.max_connections,
        timeout=0.1,
    )

    baseplate = Baseplate()
    baseplate.configure_logging()
    baseplate.configure_metrics(metrics_client)
    baseplate.add_to_context("redis", RedisContextFactory(redis_pool))

    counter = ActivityCounter(cfg.activity.window.total_seconds())
    handler = Handler(
        fuzz_threshold=cfg.activity.fuzz_threshold,
        counter=counter,
    )
    processor = ActivityService.ContextProcessor(handler)
    event_handler = BaseplateProcessorEventHandler(logger, baseplate)
    processor.setEventHandler(event_handler)

    return processor
Esempio n. 12
0
def secrets_store_from_config(app_config, timeout=None):
    """Configure and return a secrets store.

    This expects one configuration option:

    ``secrets.path``
        The path to the secrets file generated by the secrets fetcher daemon.

    :param dict raw_config: The application configuration which should have
        settings for the secrets store.
    :param float timeout: How long, in seconds, to block instantiation waiting
        for the secrets data to become available (defaults to not blocking).
    :rtype: :py:class:`SecretsStore`

    """
    cfg = config.parse_config(
        app_config, {
            "secrets": {
                "path":
                config.Optional(config.String,
                                default="/var/local/secrets.json"),
            },
        })
    # pylint: disable=maybe-no-member
    return SecretsStore(cfg.secrets.path, timeout=timeout)
Esempio n. 13
0
def exchange_from_config(app_config, prefix, **kwargs):
    """Make an Exchange from a configuration dictionary.

    The keys useful to :py:func:`exchange_from_config` should be prefixed,
    e.g. ``amqp.exchange_name`` etc. The ``prefix`` argument specifies the
    prefix used to filter keys.  Each key is mapped to a corresponding keyword
    argument on the :py:class:`~kombu.Exchange` constructor.  Any keyword
    arguments given to this function will be passed through to the
    :py:class:`~kombu.Exchange` constructor. Keyword arguments take precedence
    over the configuration file.

    Supported keys:

    * ``exchange_name``
    * ``exchange_type``

    """
    assert prefix.endswith(".")
    parser = config.SpecParser({
        "exchange_name": config.Optional(config.String),
        "exchange_type": config.String
    })
    options = parser.parse(prefix[:-1], app_config)
    return Exchange(name=options.exchange_name or "",
                    type=options.exchange_type,
                    **kwargs)
Esempio n. 14
0
def secrets_store_from_config(app_config, timeout=None, prefix="secrets."):
    """Configure and return a secrets store.

    The keys useful to :py:func:`secrets_store_from_config` should be prefixed, e.g.
    ``secrets.url``, etc.

    Supported keys:

    ``path``: the path to the secrets file generated by the secrets fetcher daemon.

    :param dict app_config: The application configuration which should have
        settings for the secrets store.
    :param float timeout: (Optional) How long, in seconds, to block instantiation waiting
        for the secrets data to become available (defaults to not blocking).
    :param str prefix: (Optional) specifies the prefix used to filter keys. Defaults
        to "secrets."
    :rtype: :py:class:`SecretsStore`

    """
    assert prefix.endswith(".")
    config_prefix = prefix[:-1]

    cfg = config.parse_config(
        app_config,
        {
            config_prefix: {
                "path": config.Optional(config.String, default="/var/local/secrets.json")
            }
        },
    )
    options = getattr(cfg, config_prefix)

    # pylint: disable=maybe-no-member
    return SecretsStore(options.path, timeout=timeout)
Esempio n. 15
0
def metrics_client_from_config(raw_config: config.RawConfig) -> metrics.Client:
    """Configure and return a metrics client.

    This expects two configuration options:

    ``metrics.namespace``
        The root key to prefix all metrics in this application with.
    ``metrics.endpoint``
        A ``host:port`` pair, e.g. ``localhost:2014``. If an empty string, a
        client that discards all metrics will be returned.

    :param dict raw_config: The application configuration which should have
        settings for the metrics client.
    :return: A configured client.
    :rtype: :py:class:`baseplate.metrics.Client`

    """
    cfg = config.parse_config(
        raw_config,
        {
            "metrics": {
                "namespace": config.String,
                "endpoint": config.Optional(config.Endpoint)
            }
        },
    )

    # pylint: disable=maybe-no-member
    return metrics.make_client(cfg.metrics.namespace, cfg.metrics.endpoint)
Esempio n. 16
0
def connection_from_config(app_config, prefix, **kwargs):
    """Make a Connection from a configuration dictionary.

    The keys useful to :py:func:`connection_from_config` should be prefixed,
    e.g. ``amqp.hostname`` etc. The ``prefix`` argument specifies the
    prefix used to filter keys.  Each key is mapped to a corresponding keyword
    argument on the :py:class:`~kombu.connection.Connection` constructor.  Any
    keyword arguments given to this function will be passed through to the
    :py:class:`~kombu.connection.Connection` constructor. Keyword arguments
    take precedence over the configuration file.

    Supported keys:

    * ``hostname``
    * ``virtual_host``

    """
    assert prefix.endswith(".")
    parser = config.SpecParser({
        "hostname": config.String,
        "virtual_host": config.Optional(config.String)
    })
    options = parser.parse(prefix[:-1], app_config)
    return Connection(hostname=options.hostname,
                      virtual_host=options.virtual_host,
                      **kwargs)
Esempio n. 17
0
def make_processor(app_config):  # pragma: nocover
    cfg = config.parse_config(app_config, {
        "activity": {
            "window": config.Timespan,
        },
        "tracing": {
            "endpoint": config.Optional(config.Endpoint),
            "service_name": config.String,
        },
        "redis": {
            "url": config.String,
            "max_connections": config.Optional(config.Integer, default=100),
        },
    })

    metrics_client = metrics_client_from_config(app_config)
    tracing_client = tracing_client_from_config(app_config)
    error_reporter = error_reporter_from_config(app_config, __name__)
    redis_pool = redis.BlockingConnectionPool.from_url(
        cfg.redis.url,
        max_connections=cfg.redis.max_connections,
        timeout=0.1,
    )

    baseplate = Baseplate()
    baseplate.configure_logging()
    baseplate.configure_metrics(metrics_client)
    baseplate.configure_tracing(tracing_client)
    baseplate.configure_error_reporting(error_reporter)
    baseplate.add_to_context("redis", RedisContextFactory(redis_pool))

    counter = ActivityCounter(cfg.activity.window.total_seconds())
    handler = Handler(counter=counter)
    processor = ActivityService.ContextProcessor(handler)
    event_handler = BaseplateProcessorEventHandler(logger, baseplate)
    processor.setEventHandler(event_handler)

    return processor
Esempio n. 18
0
def make_server(server_config, listener, app):
    # pylint: disable=maybe-no-member
    cfg = config.parse_config(
        server_config, {
            "max_concurrency": config.Integer,
            "stop_timeout": config.Optional(config.Integer, default=0),
        })

    pool = Pool(size=cfg.max_concurrency)
    server = GeventServer(
        processor=app,
        listener=listener,
        spawn=pool,
    )
    server.stop_timeout = cfg.stop_timeout
    return server
Esempio n. 19
0
def make_wsgi_app(app_config):
    cfg = config.parse_config(
        app_config, {
            "activity": {
                "endpoint": config.Endpoint,
            },
            "tracing": {
                "endpoint": config.Optional(config.Endpoint),
                "service_name": config.String,
            },
        })

    metrics_client = make_metrics_client(app_config)

    pool = ThriftConnectionPool(cfg.activity.endpoint)

    baseplate = Baseplate()
    baseplate.configure_logging()
    baseplate.configure_metrics(metrics_client)
    baseplate.configure_tracing(
        cfg.tracing.service_name,
        cfg.tracing.endpoint,
    )

    baseplate.add_to_context(
        "activity", ThriftContextFactory(pool, ActivityService.Client))

    configurator = Configurator(settings=app_config)

    baseplate_configurator = BaseplateConfigurator(baseplate)
    configurator.include(baseplate_configurator.includeme)

    controller = ActivityGateway()
    configurator.add_route("health", "/health", request_method="GET")
    configurator.add_view(controller.is_healthy,
                          route_name="health",
                          renderer="json")

    configurator.add_route("pixel",
                           "/{context_id:[A-Za-z0-9_]{,40}}.png",
                           request_method="GET")
    configurator.add_view(controller.pixel, route_name="pixel")

    return configurator.make_wsgi_app()
Esempio n. 20
0
def engine_from_config(app_config, secrets=None, prefix="database."):
    """Make an :py:class:`~sqlalchemy.engine.Engine` from a configuration dictionary.

    The keys useful to :py:func:`engine_from_config` should be prefixed, e.g.
    ``database.url``, etc. The ``prefix`` argument specifies the prefix used to
    filter keys.

    Supported keys:

    * ``url``: the connection URL to the database, passed to
        :py:func:`~sqlalchemy.engine.url.make_url` to create the
        :py:class:`~sqlalchemy.engine.url.URL` used to connect to the database.
    * ``credentials_secret`` (optional): the key used to retrieve the database
        credentials from ``secrets`` as a :py:class:`~baseplate.secrets.CredentialSecret`.
        If this is supplied, any credentials given in ``url`` we be replaced by
        these.

    """
    assert prefix.endswith(".")
    config_prefix = prefix[:-1]
    cfg = config.parse_config(
        app_config,
        {
            config_prefix: {
                "url": config.String,
                "credentials_secret": config.Optional(config.String),
            }
        },
    )
    options = getattr(cfg, config_prefix)
    url = make_url(options.url)

    if options.credentials_secret:
        if not secrets:
            raise TypeError(
                "'secrets' is a required argument to 'engine_from_config' "
                "if 'credentials_secret' is set"
            )
        credentials = secrets.get_credentials(options.credentials_secret)
        url.username = credentials.username
        url.password = credentials.password

    return create_engine(url)
Esempio n. 21
0
    def test_simple_config(self):
        result = config.parse_config(
            self.config, {
                "simple": config.String,
                "foo": {
                    "bar": config.Integer,
                },
                "noo": {
                    "bar": config.Optional(config.String, default=""),
                },
                "deep": {
                    "so": {
                        "deep": config.String,
                    },
                },
            })

        self.assertEqual(result.simple, "oink")
        self.assertEqual(result.foo.bar, 33)
        self.assertEqual(result.noo.bar, "")
        self.assertEqual(result.deep.so.deep, "very")
Esempio n. 22
0
def cluster_from_config(app_config, prefix="cassandra.", **kwargs):
    """Make a Cluster from a configuration dictionary.

    The keys useful to :py:func:`cluster_from_config` should be prefixed, e.g.
    ``cassandra.contact_points`` etc. The ``prefix`` argument specifies the
    prefix used to filter keys.  Each key is mapped to a corresponding keyword
    argument on the :py:class:`~cassandra.cluster.Cluster` constructor.  Any
    keyword arguments given to this function will be passed through to the
    :py:class:`~cassandra.cluster.Cluster` constructor. Keyword arguments take
    precedence over the configuration file.

    Supported keys:

    * ``contact_points`` (required): comma delimited list of contact points to
      try connecting for cluster discovery
    * ``port``: The server-side port to open connections to.

    """
    assert prefix.endswith(".")
    config_prefix = prefix[:-1]
    cfg = config.parse_config(
        app_config,
        {
            config_prefix: {
                "contact_points": config.TupleOf(config.String),
                "port": config.Optional(config.Integer, default=None),
            }
        },
    )

    options = getattr(cfg, config_prefix)

    if options.port:
        kwargs.setdefault("port", options.port)

    return Cluster(options.contact_points, **kwargs)
Esempio n. 23
0
class Globals(object):
    spec = {

        ConfigValue.int: [
            'db_pool_size',
            'db_pool_overflow_size',
            'commentpane_cache_time',
            'num_mc_clients',
            'MAX_CAMPAIGNS_PER_LINK',
            'MIN_DOWN_LINK',
            'MIN_UP_KARMA',
            'MIN_DOWN_KARMA',
            'MIN_RATE_LIMIT_KARMA',
            'MIN_RATE_LIMIT_COMMENT_KARMA',
            'HOT_PAGE_AGE',
            'ADMIN_COOKIE_TTL',
            'ADMIN_COOKIE_MAX_IDLE',
            'OTP_COOKIE_TTL',
            'hsts_max_age',
            'num_comments',
            'max_comments',
            'max_comments_gold',
            'max_comment_parent_walk',
            'max_sr_images',
            'num_serendipity',
            'comment_visits_period',
            'butler_max_mentions',
            'min_membership_create_community',
            'bcrypt_work_factor',
            'cassandra_pool_size',
            'sr_banned_quota',
            'sr_muted_quota',
            'sr_wikibanned_quota',
            'sr_wikicontributor_quota',
            'sr_moderator_invite_quota',
            'sr_contributor_quota',
            'sr_quota_time',
            'sr_invite_limit',
            'thumbnail_hidpi_scaling',
            'wiki_keep_recent_days',
            'wiki_max_page_length_bytes',
            'wiki_max_page_name_length',
            'wiki_max_page_separators',
            'RL_RESET_MINUTES',
            'RL_OAUTH_RESET_MINUTES',
            'comment_karma_display_floor',
            'link_karma_display_floor',
            'mobile_auth_gild_time',
            'default_total_budget_pennies',
            'min_total_budget_pennies',
            'max_total_budget_pennies',
            'default_bid_pennies',
            'min_bid_pennies',
            'max_bid_pennies',
            'frequency_cap_min',
            'frequency_cap_default',
            'eu_cookie_max_attempts',
        ],

        ConfigValue.float: [
            'statsd_sample_rate',
            'querycache_prune_chance',
            'RL_AVG_REQ_PER_SEC',
            'RL_OAUTH_AVG_REQ_PER_SEC',
            'RL_LOGIN_AVG_PER_SEC',
            'RL_LOGIN_IP_AVG_PER_SEC',
            'RL_SHARE_AVG_PER_SEC',
            'tracing_sample_rate',
        ],

        ConfigValue.bool: [
            'debug',
            'log_start',
            'sqlprinting',
            'template_debug',
            'reload_templates',
            'uncompressedJS',
            'css_killswitch',
            'db_create_tables',
            'disallow_db_writes',
            'disable_ratelimit',
            'amqp_logging',
            'read_only_mode',
            'disable_wiki',
            'heavy_load_mode',
            'disable_captcha',
            'disable_ads',
            'disable_require_admin_otp',
            'trust_local_proxies',
            'shard_commentstree_queues',
            'shard_author_query_queues',
            'shard_subreddit_query_queues',
            'shard_domain_query_queues',
            'authnet_validate',
            'ENFORCE_RATELIMIT',
            'RL_SITEWIDE_ENABLED',
            'RL_OAUTH_SITEWIDE_ENABLED',
            'enable_loggedout_experiments',
        ],

        ConfigValue.tuple: [
            'plugins',
            'stalecaches',
            'lockcaches',
            'permacache_memcaches',
            'cassandra_seeds',
            'automatic_reddits',
            'hardcache_categories',
            'case_sensitive_domains',
            'known_image_domains',
            'reserved_subdomains',
            'offsite_subdomains',
            'TRAFFIC_LOG_HOSTS',
            'exempt_login_user_agents',
            'autoexpand_media_types',
            'media_preview_domain_whitelist',
            'multi_icons',
            'hide_subscribers_srs',
            'mcrouter_addr',
        ],

        ConfigValue.tuple_of(ConfigValue.int): [
            'thumbnail_size',
            'preview_image_max_size',
            'preview_image_min_size',
            'mobile_ad_image_size',
        ],

        ConfigValue.tuple_of(ConfigValue.float): [
            'ios_versions',
            'android_versions',
        ],

        ConfigValue.dict(ConfigValue.str, ConfigValue.int): [
            'user_agent_ratelimit_regexes',
        ],

        ConfigValue.str: [
            'wiki_page_registration_info',
            'wiki_page_privacy_policy',
            'wiki_page_user_agreement',
            'wiki_page_gold_bottlecaps',
            'fraud_email',
            'feedback_email',
            'share_reply',
            'community_email',
            'smtp_server',
            'events_collector_url',
            'events_collector_test_url',
            'search_provider',
        ],

        ConfigValue.choice(ONE=CL_ONE, QUORUM=CL_QUORUM): [
             'cassandra_rcl',
             'cassandra_wcl',
        ],

        ConfigValue.choice(zookeeper="zookeeper", config="config"): [
            "liveconfig_source",
            "secrets_source",
        ],

        ConfigValue.timeinterval: [
            'ARCHIVE_AGE',
            "vote_queue_grace_period",
        ],

        config_gold_price: [
            'gold_month_price',
            'gold_year_price',
            'cpm_selfserve',
            'cpm_selfserve_geotarget_metro',
            'cpm_selfserve_geotarget_country',
            'cpm_selfserve_collection',
        ],

        ConfigValue.baseplate(baseplate_config.Optional(baseplate_config.Endpoint)): [
            "activity_endpoint",
            "tracing_endpoint",
        ],

        ConfigValue.dict(ConfigValue.str, ConfigValue.str): [
            'emr_traffic_tags',
        ],
    }

    live_config_spec = {
        ConfigValue.bool: [
            'frontend_logging',
            'mobile_gild_first_login',
            'precomputed_comment_suggested_sort',
        ],
        ConfigValue.int: [
            'captcha_exempt_comment_karma',
            'captcha_exempt_link_karma',
            'create_sr_account_age_days',
            'create_sr_comment_karma',
            'create_sr_link_karma',
            'cflag_min_votes',
            'ads_popularity_threshold',
            'precomputed_comment_sort_min_comments',
            'comment_vote_update_threshold',
            'comment_vote_update_period',
        ],
        ConfigValue.float: [
            'cflag_lower_bound',
            'cflag_upper_bound',
            'spotlight_interest_sub_p',
            'spotlight_interest_nosub_p',
            'gold_revenue_goal',
            'invalid_key_sample_rate',
            'events_collector_vote_sample_rate',
            'events_collector_poison_sample_rate',
            'events_collector_mod_sample_rate',
            'events_collector_quarantine_sample_rate',
            'events_collector_modmail_sample_rate',
            'events_collector_report_sample_rate',
            'events_collector_submit_sample_rate',
            'events_collector_comment_sample_rate',
            'events_collector_use_gzip_chance',
            'https_cert_testing_probability',
        ],
        ConfigValue.tuple: [
            'fastlane_links',
            'listing_chooser_sample_multis',
            'discovery_srs',
            'proxy_gilding_accounts',
            'mweb_blacklist_expressions',
            'global_loid_experiments',
            'precomputed_comment_sorts',
            'mailgun_domains',
        ],
        ConfigValue.str: [
            'listing_chooser_gold_multi',
            'listing_chooser_explore_sr',
        ],
        ConfigValue.messages: [
            'welcomebar_messages',
            'sidebar_message',
            'gold_sidebar_message',
        ],
        ConfigValue.dict(ConfigValue.str, ConfigValue.int): [
            'ticket_groups',
            'ticket_user_fields', 
        ],
        ConfigValue.dict(ConfigValue.str, ConfigValue.float): [
            'pennies_per_server_second',
        ],
        ConfigValue.dict(ConfigValue.str, ConfigValue.str): [
            'employee_approved_clients',
            'modmail_forwarding_email',
            'modmail_account_map',
        ],
        ConfigValue.dict(ConfigValue.str, ConfigValue.choice(**PERMISSIONS)): [
            'employees',
        ],
    }

    def __init__(self, config, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.

        ``config``
            The PylonsConfig object passed in from ``config/environment.py``

        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.

        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.

        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.

        """

        global_conf.setdefault("debug", False)

        # reloading site ensures that we have a fresh sys.path to build our
        # working set off of. this means that forked worker processes won't get
        # the sys.path that was current when the master process was spawned
        # meaning that new plugins will be picked up on regular app reload
        # rather than having to restart the master process as well.
        reload(site)
        self.pkg_resources_working_set = pkg_resources.WorkingSet()

        self.config = ConfigValueParser(global_conf)
        self.config.add_spec(self.spec)
        self.plugins = PluginLoader(self.pkg_resources_working_set,
                                    self.config.get("plugins", []))

        self.stats = Stats(self.config.get('statsd_addr'),
                           self.config.get('statsd_sample_rate'))
        self.startup_timer = self.stats.get_timer("app_startup")
        self.startup_timer.start()

        self.baseplate = Baseplate()
        self.baseplate.configure_logging()
        self.baseplate.register(R2BaseplateObserver())
#        self.baseplate.configure_tracing(
#            service_name="r2",
#            tracing_endpoint=self.config.get("tracing_endpoint"),
#            sample_rate=self.config.get("tracing_sample_rate"),
#        )

        self.paths = paths

        self.running_as_script = global_conf.get('running_as_script', False)
        
        # turn on for language support
        self.lang = getattr(self, 'site_lang', 'en')
        self.languages, self.lang_name = get_active_langs(
            config, default_lang=self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages
        
        # set default time zone if one is not set
        tz = global_conf.get('timezone', 'UTC')
        self.tz = pytz.timezone(tz)
        
        dtz = global_conf.get('display_timezone', tz)
        self.display_tz = pytz.timezone(dtz)

        self.startup_timer.intermediate("init")

    def __getattr__(self, name):
        if not name.startswith('_') and name in self.config:
            return self.config[name]
        else:
            raise AttributeError("g has no attr %r" % name)

    def setup(self):
        self.env = ''
        if (
            # handle direct invocation of "nosetests"
            "test" in sys.argv[0] or
            # handle "setup.py test" and all permutations thereof.
            "setup.py" in sys.argv[0] and "test" in sys.argv[1:]
        ):
            self.env = "unit_test"

        self.queues = queues.declare_queues(self)

        self.extension_subdomains = dict(
            simple="mobile",
            i="compact",
            api="api",
            rss="rss",
            xml="xml",
            json="json",
        )

        ################# PROVIDERS
        self.auth_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.auth",
            self.authentication_provider,
        )
        self.media_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.media",
            self.media_provider,
        )
        self.cdn_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.cdn",
            self.cdn_provider,
        )
        self.ticket_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.support",
            # TODO: fix this later, it refuses to pick up 
            # g.config['ticket_provider'] value, so hardcoding for now.
            # really, the next uncommented line should be:
            #self.ticket_provider,
            # instead of:
            "zendesk",
        )
        self.image_resizing_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.image_resizing",
            self.image_resizing_provider,
        )
        self.email_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.email",
            self.email_provider,
        )
        self.startup_timer.intermediate("providers")

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = self.default_scheme + "://" + origin_prefix + self.domain

        self.trusted_domains = set([self.domain])
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.trusted_domains.add(https_url.hostname)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        # make python warnings go through the logging system
        logging.captureWarnings(capture=True)

        log = logging.getLogger('reddit')

        # when we're a script (paster run) just set up super simple logging
        if self.running_as_script:
            log.setLevel(logging.INFO)
            log.addHandler(logging.StreamHandler())

        # if in debug mode, override the logging level to DEBUG
        if self.debug:
            log.setLevel(logging.DEBUG)

        # attempt to figure out which pool we're in and add that to the
        # LogRecords.
        try:
            with open("/etc/ec2_asg", "r") as f:
                pool = f.read().strip()
            # clean up the pool name since we're putting stuff after "-"
            pool = pool.partition("-")[0]
        except IOError:
            pool = "reddit-app"
        self.log = logging.LoggerAdapter(log, {"pool": pool})

        # set locations
        locations = pkg_resources.resource_stream(__name__,
                                                  "../data/locations.json")
        self.locations = json.loads(locations.read())

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print >> sys.stderr, ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")
        if self.oauth_domain == self.domain:
            print >> sys.stderr, ("Warning: g.oauth_domain == g.domain. "
                    "CORS requests to g.domain will be allowed")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        locale.setlocale(locale.LC_ALL, self.locale)

        # Pre-calculate ratelimit values
        self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
        self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
                                      self.RL_RESET_SECONDS)

        self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
        self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
                                     self.RL_OAUTH_RESET_SECONDS)

        self.RL_LOGIN_MAX_REQS = int(self.config["RL_LOGIN_AVG_PER_SEC"] *
                                     self.RL_RESET_SECONDS)
        self.RL_LOGIN_IP_MAX_REQS = int(self.config["RL_LOGIN_IP_AVG_PER_SEC"] *
                                        self.RL_RESET_SECONDS)
        self.RL_SHARE_MAX_REQS = int(self.config["RL_SHARE_AVG_PER_SEC"] *
                                     self.RL_RESET_SECONDS)

        # Compile ratelimit regexs
        user_agent_ratelimit_regexes = {}
        for agent_re, limit in self.user_agent_ratelimit_regexes.iteritems():
            user_agent_ratelimit_regexes[re.compile(agent_re)] = limit
        self.user_agent_ratelimit_regexes = user_agent_ratelimit_regexes

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        zk_hosts = self.config["zookeeper_connection_string"]
        zk_username = self.config["zookeeper_username"]
        zk_password = self.config["zookeeper_password"]
        self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
                                                         zk_password))

        self.throttles = IPNetworkLiveList(
            self.zookeeper,
            root="/throttles",
            reduced_data_node="/throttles_reduced",
        )

        parser = ConfigParser.RawConfigParser()
        parser.optionxform = str
        parser.read([self.config["__file__"]])

        if self.config["liveconfig_source"] == "zookeeper":
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
        else:
            self.live_config = extract_live_config(parser, self.plugins)

        if self.config["secrets_source"] == "zookeeper":
            self.secrets = fetch_secrets(self.zookeeper)
        else:
            self.secrets = extract_secrets(parser)

        ################# PRIVILEGED USERS
        self.admins = PermissionFilteredEmployeeList(
            self.live_config, type="admin")
        self.sponsors = PermissionFilteredEmployeeList(
            self.live_config, type="sponsor")
        self.employees = PermissionFilteredEmployeeList(
            self.live_config, type="employee")

        # Store which OAuth clients employees may use, the keys are just for
        # readability.
        self.employee_approved_clients = \
            self.live_config["employee_approved_clients"].values()

        self.startup_timer.intermediate("zookeeper")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # a smaller pool of caches used only for distributed locks.
        self.lock_cache = CMemcache(
            "lock",
            self.lockcaches,
            num_clients=num_mc_clients,
        )
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        permacache_memcaches = CMemcache(
            "perma",
            self.permacache_memcaches,
            min_compress_len=1400,
            num_clients=num_mc_clients,
        )

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(
                "stale",
                self.stalecaches,
                num_clients=num_mc_clients,
            )
        else:
            stalecaches = None

        self.startup_timer.intermediate("memcache")

        ################# MCROUTER
        self.mcrouter = Mcrouter(
            "mcrouter",
            self.mcrouter_addr,
            min_compress_len=1400,
            num_clients=num_mc_clients,
        )

        ################# THRIFT-BASED SERVICES
        activity_endpoint = self.config.get("activity_endpoint")
        if activity_endpoint:
            # make ActivityInfo objects rendercache-key friendly
            # TODO: figure out a more general solution for this if
            # we need to do this for other thrift-generated objects
            ActivityInfo.cache_key = lambda self, style: repr(self)

            activity_pool = ThriftConnectionPool(activity_endpoint, timeout=0.1)
            self.baseplate.add_to_context("activity_service",
                ThriftContextFactory(activity_pool, ActivityService.Client))

        self.startup_timer.intermediate("thrift")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=4,
                    max_retries=3,
                    prefill=False
                ),
        }

        permacache_cf = Permacache._setup_column_family(
            'permacache',
            self.cassandra_pools[self.cassandra_default_pool],
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        cache_chains = {}
        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)

        if stalecaches:
            self.gencache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.gencache = CacheChain((localcache_cls(), self.mcrouter))
        cache_chains.update(gencache=self.gencache)

        if stalecaches:
            self.thingcache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.thingcache = CacheChain((localcache_cls(), self.mcrouter))
        cache_chains.update(thingcache=self.thingcache)

        if stalecaches:
            self.memoizecache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.memoizecache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(memoizecache=self.memoizecache)

        if stalecaches:
            self.srmembercache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.srmembercache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(srmembercache=self.srmembercache)

        if stalecaches:
            self.relcache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.relcache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(relcache=self.relcache)

        self.ratelimitcache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(ratelimitcache=self.ratelimitcache)

        # rendercache holds rendered partial templates.
        self.rendercache = MemcacheChain((
            localcache_cls(),
            self.mcrouter,
        ))
        cache_chains.update(rendercache=self.rendercache)

        # commentpanecaches hold fully rendered comment panes
        self.commentpanecache = MemcacheChain((
            localcache_cls(),
            self.mcrouter,
        ))
        cache_chains.update(commentpanecache=self.commentpanecache)

        # cassandra_local_cache is used for request-local caching in tdb_cassandra
        self.cassandra_local_cache = localcache_cls()
        cache_chains.update(cassandra_local_cache=self.cassandra_local_cache)

        if stalecaches:
            permacache_cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                permacache_memcaches,
            )
        else:
            permacache_cache = CacheChain(
                (localcache_cls(), permacache_memcaches),
            )
        cache_chains.update(permacache=permacache_cache)

        self.permacache = Permacache(
            permacache_cache,
            permacache_cf,
            lock_factory=self.make_lock,
        )

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain(
            (localcache_cls(), HardCache(self)),
            cache_negative_results=True,
        )
        cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                if isinstance(chain, TransitionalCache):
                    chain = chain.read_chain

                chain.reset()
                if isinstance(chain, LocalCache):
                    continue
                elif isinstance(chain, StaleCacheChain):
                    chain.stats = StaleCacheStats(self.stats, name)
                else:
                    chain.stats = CacheStats(self.stats, name)
        self.cache_chains = cache_chains

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        # Initialize the amqp module globals, start the worker, etc.
        r2.lib.amqp.initialize(self)

        self.events = EventQueue()

        self.startup_timer.intermediate("revisions")

    def setup_complete(self):
        self.startup_timer.stop()
        self.stats.flush()

        if self.log_start:
            self.log.error(
                "%s:%s started %s at %s (took %.02fs)",
                self.reddit_host,
                self.reddit_pid,
                self.short_version,
                datetime.now().strftime("%H:%M:%S"),
                self.startup_timer.elapsed_seconds()
            )

        if einhorn.is_worker():
            einhorn.ack_startup()

    def record_repo_version(self, repo_name, git_dir):
        """Get the currently checked out git revision for a given repository,
        record it in g.versions, and return the short version of the hash."""
        try:
            subprocess.check_output
        except AttributeError:
            # python 2.6 compat
            pass
        else:
            try:
                revision = subprocess.check_output(["git",
                                                    "--git-dir", git_dir,
                                                    "rev-parse", "HEAD"])
            except subprocess.CalledProcessError, e:
                self.log.warning("Unable to fetch git revision: %r", e)
            else:
Esempio n. 24
0
def main():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("config_file",
                            type=argparse.FileType("r"),
                            help="path to a configuration file")
    arg_parser.add_argument("--debug",
                            default=False,
                            action="store_true",
                            help="enable debug logging")
    arg_parser.add_argument(
        "--once",
        default=False,
        action="store_true",
        help="only run the fetcher once rather than as a daemon",
    )
    args = arg_parser.parse_args()

    if args.debug:
        level = logging.DEBUG
    else:
        level = logging.INFO

    logging.basicConfig(format="%(asctime)s:%(levelname)s:%(message)s",
                        level=level)
    parser = configparser.RawConfigParser()
    parser.readfp(args.config_file)  # pylint: disable=deprecated-method
    fetcher_config = dict(parser.items("secret-fetcher"))

    cfg = config.parse_config(
        fetcher_config,
        {
            "vault": {
                "url":
                config.String,
                "role":
                config.String,
                "auth_type":
                config.Optional(
                    config.OneOf(**VaultClientFactory.auth_types()),
                    default=VaultClientFactory.auth_types()["aws"],
                ),
                "mount_point":
                config.Optional(config.String, default="aws-ec2"),
            },
            "output": {
                "path":
                config.Optional(config.String,
                                default="/var/local/secrets.json"),
                "owner":
                config.Optional(config.UnixUser, default=0),
                "group":
                config.Optional(config.UnixGroup, default=0),
                "mode":
                config.Optional(config.Integer(base=8), default=0o400),
            },
            "secrets": config.Optional(config.TupleOf(config.String),
                                       default=[]),
        },
    )

    # pylint: disable=maybe-no-member
    client_factory = VaultClientFactory(cfg.vault.url, cfg.vault.role,
                                        cfg.vault.auth_type,
                                        cfg.vault.mount_point)

    if args.once:
        logger.info("Running secret fetcher once")
        fetch_secrets(cfg, client_factory)
    else:
        logger.info("Running secret fetcher as a daemon")
        while True:
            soonest_expiration = fetch_secrets(cfg, client_factory)
            time_til_expiration = soonest_expiration - datetime.datetime.utcnow(
            )
            time_to_sleep = time_til_expiration - VAULT_TOKEN_PREFETCH_TIME
            time.sleep(max(int(time_to_sleep.total_seconds()), 1))
Esempio n. 25
0
def publish_events():
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument("config_file",
                            type=argparse.FileType("r"),
                            help="path to a configuration file")
    arg_parser.add_argument(
        "--queue-name",
        default="main",
        help="name of event queue / publisher config (default: main)",
    )
    arg_parser.add_argument("--debug",
                            default=False,
                            action="store_true",
                            help="enable debug logging")
    args = arg_parser.parse_args()

    if args.debug:
        level = logging.DEBUG
    else:
        level = logging.WARNING
    logging.basicConfig(level=level)

    config_parser = configparser.RawConfigParser()
    config_parser.readfp(args.config_file)  # pylint: disable=deprecated-method
    raw_config = dict(config_parser.items("event-publisher:" +
                                          args.queue_name))
    cfg = config.parse_config(
        raw_config,
        {
            "collector": {
                "hostname": config.String,
                "version": config.Optional(config.Integer, default=1),
            },
            "key": {
                "name": config.String,
                "secret": config.Base64
            },
        },
    )

    metrics_client = metrics_client_from_config(raw_config)

    event_queue = MessageQueue("/events-" + args.queue_name,
                               max_messages=MAX_QUEUE_SIZE,
                               max_message_size=MAX_EVENT_SIZE)

    # pylint: disable=maybe-no-member
    serializer = SERIALIZER_BY_VERSION[cfg.collector.version]()
    batcher = TimeLimitedBatch(serializer, MAX_BATCH_AGE)
    publisher = BatchPublisher(metrics_client, cfg)

    while True:
        try:
            message = event_queue.get(timeout=0.2)
        except TimedOutError:
            message = None

        try:
            batcher.add(message)
        except BatchFull:
            serialized = batcher.serialize()
            publisher.publish(serialized)
            batcher.reset()
            batcher.add(message)
Esempio n. 26
0
def error_reporter_from_config(raw_config: config.RawConfig,
                               module_name: str) -> "raven.Client":
    """Configure and return a error reporter.

    This expects one configuration option and can take many optional ones:

    ``sentry.dsn``
        The DSN provided by Sentry. If blank, the reporter will discard events.
    ``sentry.site`` (optional)
        An arbitrary string to identify this client installation.
    ``sentry.environment`` (optional)
        The environment your application is running in.
    ``sentry.exclude_paths`` (optional)
        Comma-delimited list of module prefixes to ignore when discovering
        where an error came from.
    ``sentry.include_paths`` (optional)
        Comma-delimited list of paths to include for consideration when
        drilling down to an exception.
    ``sentry.ignore_exceptions`` (optional)
        Comma-delimited list of fully qualified names of exception classes
        (potentially with * globs) to not report.
    ``sentry.sample_rate`` (optional)
        Percentage of errors to report. (e.g. "37%")
    ``sentry.processors`` (optional)
        Comma-delimited list of fully qualified names of processor classes
        to apply to events before sending to Sentry.

    Example usage::

        error_reporter_from_config(app_config, __name__)

    :param dict raw_config: The application configuration which should have
        settings for the error reporter.
    :param str module_name: ``__name__`` of the root module of the application.
    :rtype: :py:class:`raven.Client`

    """
    import raven  # pylint: disable=redefined-outer-name

    cfg = config.parse_config(
        raw_config,
        {
            "sentry": {
                "dsn":
                config.Optional(config.String, default=None),
                "site":
                config.Optional(config.String, default=None),
                "environment":
                config.Optional(config.String, default=None),
                "include_paths":
                config.Optional(config.String, default=None),
                "exclude_paths":
                config.Optional(config.String, default=None),
                "ignore_exceptions":
                config.Optional(config.TupleOf(config.String), default=[]),
                "sample_rate":
                config.Optional(config.Percent, default=1),
                "processors":
                config.Optional(
                    config.TupleOf(config.String),
                    default=["raven.processors.SanitizePasswordsProcessor"],
                ),
            }
        },
    )

    application_module = sys.modules[module_name]
    directory = os.path.dirname(application_module.__file__)
    release = None
    while directory != "/":
        try:
            release = raven.fetch_git_sha(directory)
        except raven.exceptions.InvalidGitRepository:
            directory = os.path.dirname(directory)
        else:
            break

    # pylint: disable=maybe-no-member
    return raven.Client(
        dsn=cfg.sentry.dsn,
        site=cfg.sentry.site,
        release=release,
        environment=cfg.sentry.environment,
        include_paths=cfg.sentry.include_paths,
        exclude_paths=cfg.sentry.exclude_paths,
        ignore_exceptions=cfg.sentry.ignore_exceptions,
        sample_rate=cfg.sentry.sample_rate,
        processors=cfg.sentry.processors,
    )
Esempio n. 27
0
def tracing_client_from_config(
        raw_config: config.RawConfig,
        log_if_unconfigured: bool = True) -> tracing.TracingClient:
    """Configure and return a tracing client.

    This expects one configuration option and can take many optional ones:

    ``tracing.service_name``
        The name for the service this observer is registered to.
    ``tracing.endpoint`` (optional)
        (Deprecated in favor of the sidecar model.) Destination to record span data.
    ``tracing.queue_name`` (optional)
        Name of POSIX queue where spans are recorded
    ``tracing.max_span_queue_size`` (optional)
        Span processing queue limit.
    ``tracing.num_span_workers`` (optional)
        Number of worker threads for span processing.
    ``tracing.span_batch_interval`` (optional)
        Wait time for span processing in seconds.
    ``tracing.num_conns`` (optional)
        Pool size for remote recorder connection pool.
    ``tracing.sample_rate`` (optional)
        Percentage of unsampled requests to record traces for (e.g. "37%")

    :param dict raw_config: The application configuration which should have
        settings for the tracing client.
    :param bool log_if_unconfigured: When the client is not configured, should
        trace spans be logged or discarded silently?
    :return: A configured client.
    :rtype: :py:class:`baseplate.diagnostics.tracing.TracingClient`

    """
    cfg = config.parse_config(
        raw_config,
        {
            "tracing": {
                "service_name":
                config.String,
                "endpoint":
                config.Optional(config.Endpoint),
                "queue_name":
                config.Optional(config.String),
                "max_span_queue_size":
                config.Optional(config.Integer, default=50000),
                "num_span_workers":
                config.Optional(config.Integer, default=5),
                "span_batch_interval":
                config.Optional(config.Timespan,
                                default=config.Timespan("500 milliseconds")),
                "num_conns":
                config.Optional(config.Integer, default=100),
                "sample_rate":
                config.Optional(config.Fallback(config.Percent, config.Float),
                                default=0.1),
            }
        },
    )

    # pylint: disable=maybe-no-member
    return tracing.make_client(
        service_name=cfg.tracing.service_name,
        tracing_endpoint=cfg.tracing.endpoint,
        tracing_queue_name=cfg.tracing.queue_name,
        max_span_queue_size=cfg.tracing.max_span_queue_size,
        num_span_workers=cfg.tracing.num_span_workers,
        span_batch_interval=cfg.tracing.span_batch_interval.total_seconds(),
        num_conns=cfg.tracing.num_conns,
        sample_rate=cfg.tracing.sample_rate,
        log_if_unconfigured=log_if_unconfigured,
    )
Esempio n. 28
0
 def test_optional_exists(self):
     parser = config.Optional(config.Integer)
     self.assertEqual(parser("33"), 33)
Esempio n. 29
0
 def test_optional_invalid(self):
     parser = config.Optional(config.Integer)
     with self.assertRaises(ValueError):
         parser("asdf")
Esempio n. 30
0
 def test_optional_default(self):
     parser = config.Optional(config.Integer)
     self.assertEqual(parser(""), None)