Esempio n. 1
0
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.

        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.

        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.

        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.

        """

        global_conf.setdefault("debug", False)

        self.config = ConfigValueParser(global_conf)
        self.config.add_spec(self.spec)
        self.plugins = PluginLoader(self.config.get("plugins", []))

        self.stats = Stats(self.config.get('statsd_addr'),
                           self.config.get('statsd_sample_rate'))
        self.startup_timer = self.stats.get_timer("app_startup")
        self.startup_timer.start()

        self.paths = paths

        self.running_as_script = global_conf.get('running_as_script', False)
        
        # turn on for language support
        self.lang = getattr(self, 'site_lang', 'en')
        self.languages, self.lang_name = \
            get_active_langs(default_lang=self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages
        
        # set default time zone if one is not set
        tz = global_conf.get('timezone', 'UTC')
        self.tz = pytz.timezone(tz)
        
        dtz = global_conf.get('display_timezone', tz)
        self.display_tz = pytz.timezone(dtz)

        self.startup_timer.intermediate("init")
Esempio n. 2
0
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.

        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.

        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.

        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.

        """

        global_conf.setdefault("debug", False)

        # reloading site ensures that we have a fresh sys.path to build our
        # working set off of. this means that forked worker processes won't get
        # the sys.path that was current when the master process was spawned
        # meaning that new plugins will be picked up on regular app reload
        # rather than having to restart the master process as well.
        reload(site)
        self.pkg_resources_working_set = pkg_resources.WorkingSet()

        self.config = ConfigValueParser(global_conf)
        self.config.add_spec(self.spec)
        self.plugins = PluginLoader(self.pkg_resources_working_set,
                                    self.config.get("plugins", []))

        self.stats = Stats(self.config.get('statsd_addr'),
                           self.config.get('statsd_sample_rate'))
        self.startup_timer = self.stats.get_timer("app_startup")
        self.startup_timer.start()

        self.paths = paths

        self.running_as_script = global_conf.get('running_as_script', False)

        # turn on for language support
        self.lang = getattr(self, 'site_lang', 'en')
        self.languages, self.lang_name = \
            get_active_langs(default_lang=self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        # set default time zone if one is not set
        tz = global_conf.get('timezone', 'UTC')
        self.tz = pytz.timezone(tz)

        dtz = global_conf.get('display_timezone', tz)
        self.display_tz = pytz.timezone(dtz)

        self.startup_timer.intermediate("init")
Esempio n. 3
0
    def setup(self):
        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache
                          if self.running_as_script else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = {}

        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper, LiveConfig,
                                          LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts,
                                                  (zk_username, zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.throttles = LiveList(self.zookeeper,
                                      "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.throttles = tuple()  # immutable since it's not real

        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)
        self.lock_cache = CMemcache(self.lockcaches,
                                    num_clients=num_mc_clients)

        self.stats = Stats(self.config.get('statsd_addr'),
                           self.config.get('statsd_sample_rate'))

        event.listens_for(engine.Engine, 'before_cursor_execute')(
            self.stats.pg_before_cursor_execute)
        event.listens_for(engine.Engine, 'after_cursor_execute')(
            self.stats.pg_after_cursor_execute)

        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
            StatsCollectingConnectionPool(keyspace,
                                          stats=self.stats,
                                          logging_name="main",
                                          server_list=self.cassandra_seeds,
                                          pool_size=self.cassandra_pool_size,
                                          timeout=2,
                                          max_retries=3,
                                          prefill=False),
        }

        perma_memcache = (CMemcache(self.permacache_memcaches,
                                    num_clients=num_mc_clients)
                          if self.permacache_memcaches else None)
        self.permacache = CassandraCacheChain(
            localcache_cls(),
            CassandraCache('permacache',
                           self.cassandra_pools[self.cassandra_default_pool],
                           read_consistency_level=self.cassandra_rcl,
                           write_consistency_level=self.cassandra_wcl),
            memcache=perma_memcache,
            lock_factory=self.make_lock)

        self.cache_chains.update(permacache=self.permacache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        if self.stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                CMemcache(self.stalecaches, num_clients=num_mc_clients),
                self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.rendercaches,
                       noreply=True,
                       no_block=True,
                       num_clients=num_mc_clients)))
        self.cache_chains.update(rendercache=self.rendercache)

        self.thing_cache = CacheChain((localcache_cls(), ))
        self.cache_chains.update(thing_cache=self.thing_cache)

        #load the database info
        self.dbm = self.load_db_params()

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True)
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()

        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])

        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)
        if getattr(self, 'oauth_domain', None):
            self.secure_domains.add(self.oauth_domain)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print("Warning: g.media_domain == g.domain. " +
                  "This may give untrusted content access to user cookies")

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        if self.log_start:
            self.log.error("reddit app %s:%s started %s at %s" %
                           (self.reddit_host, self.reddit_pid,
                            self.short_version, datetime.now()))
Esempio n. 4
0
    def setup(self, global_conf):
        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache
                          if self.running_as_script else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = {}

        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.memcache)

        self.stats = Stats(global_conf.get('statsd_addr'),
                           global_conf.get('statsd_sample_rate'))

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
            StatsCollectingConnectionPool(keyspace,
                                          stats=self.stats,
                                          logging_name="main",
                                          server_list=self.cassandra_seeds,
                                          pool_size=len(self.cassandra_seeds),
                                          timeout=2,
                                          max_retries=3,
                                          prefill=False),
            "noretries":
            StatsCollectingConnectionPool(keyspace,
                                          stats=self.stats,
                                          logging_name="noretries",
                                          server_list=self.cassandra_seeds,
                                          pool_size=len(self.cassandra_seeds),
                                          timeout=.1,
                                          max_retries=0,
                                          prefill=False),
        }

        perma_memcache = (CMemcache(self.permacache_memcaches,
                                    num_clients=num_mc_clients)
                          if self.permacache_memcaches else None)
        self.permacache = CassandraCacheChain(
            localcache_cls(),
            CassandraCache('permacache',
                           self.cassandra_pools[self.cassandra_default_pool],
                           read_consistency_level=self.cassandra_rcl,
                           write_consistency_level=self.cassandra_wcl),
            memcache=perma_memcache,
            lock_factory=self.make_lock)

        self.cache_chains.update(permacache=self.permacache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        if self.stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                CMemcache(self.stalecaches, num_clients=num_mc_clients),
                self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.rendercaches,
                       noreply=True,
                       no_block=True,
                       num_clients=num_mc_clients)))
        self.cache_chains.update(rendercache=self.rendercache)

        self.servicecache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.servicecaches, num_clients=num_mc_clients)))
        self.cache_chains.update(servicecache=self.servicecache)

        self.thing_cache = CacheChain((localcache_cls(), ))
        self.cache_chains.update(thing_cache=self.thing_cache)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True)
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()

        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])

        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print("Warning: g.media_domain == g.domain. " +
                  "This may give untrusted content access to user cookies")

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        #the shutdown toggle
        self.shutdown = False

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision number
        try:
            self.version = subprocess.check_output(
                ["git", "rev-parse", "HEAD"])
        except subprocess.CalledProcessError, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'