コード例 #1
0
ファイル: app_globals.py プロジェクト: alexpaulzor/reddit
    def setup(self, global_conf):
        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = []

        self.memcache = CMemcache(self.memcaches, num_clients = num_mc_clients)
        self.make_lock = make_lock_factory(self.memcache)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")
        self.cassandra = PycassaConnectionPool('reddit',
                                               server_list = self.cassandra_seeds,
                                               pool_size = len(self.cassandra_seeds),
                                               # TODO: .ini setting
                                               timeout=15, max_retries=3,
                                               prefill=False)
        perma_memcache = (CMemcache(self.permacache_memcaches, num_clients = num_mc_clients)
                          if self.permacache_memcaches
                          else None)
        self.permacache = CassandraCacheChain(localcache_cls(),
                                              CassandraCache('permacache',
                                                             self.cassandra,
                                                             read_consistency_level = self.cassandra_rcl,
                                                             write_consistency_level = self.cassandra_wcl),
                                              memcache = perma_memcache,
                                              lock_factory = self.make_lock)

        self.cache_chains.append(self.permacache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        if self.stalecaches:
            self.cache = StaleCacheChain(localcache_cls(),
                                         CMemcache(self.stalecaches, num_clients=num_mc_clients),
                                         self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.append(self.cache)

        self.rendercache = MemcacheChain((localcache_cls(),
                                          CMemcache(self.rendercaches,
                                                    noreply=True, no_block=True,
                                                    num_clients = num_mc_clients)))
        self.cache_chains.append(self.rendercache)

        self.servicecache = MemcacheChain((localcache_cls(),
                                           CMemcache(self.servicecaches,
                                                     num_clients = num_mc_clients)))
        self.cache_chains.append(self.servicecache)

        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.append(self.thing_cache)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain((localcache_cls(),
                                         self.memcache,
                                         HardCache(self)),
                                        cache_negative_results = True)
        self.cache_chains.append(self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains[::]
        def reset_caches():
            for chain in cache_chains:
                chain.reset()

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])
        if self.https_endpoint:
            self.secure_domains.add(urlparse(self.https_endpoint).netloc)

        self.trusted_origins = [self.origin, self.https_endpoint] + ['http://' + origin_prefix + cname for cname in self.authorized_cnames]

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")

        self.profanities = None
        if self.profanity_wordlist and os.path.exists(self.profanity_wordlist):
            with open(self.profanity_wordlist, 'r') as handle:
                words = []
                for line in handle:
                    words.append(line.strip(' \n\r'))
                if words:
                    self.profanities = re.compile(r"\b(%s)\b" % '|'.join(words),
                                              re.I | re.U)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        #the shutdown toggle
        self.shutdown = False

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision number
        try:
            popen = subprocess.Popen(["git", "log", "--date=short",
                                      "--pretty=format:%H %h", '-n1'],
                                     stdin=subprocess.PIPE,
                                     stdout=subprocess.PIPE)
            resp, stderrdata = popen.communicate()
            resp = resp.strip().split(' ')
            self.version, self.short_version = resp
        except object, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'
コード例 #2
0
    def setup(self):
        self.env = ''
        if (
            # handle direct invocation of "nosetests"
            "test" in sys.argv[0] or
            # handle "setup.py test" and all permutations thereof.
            "setup.py" in sys.argv[0] and "test" in sys.argv[1:]
        ):
            self.env = "unit_test"

        self.queues = queues.declare_queues(self)

        self.extension_subdomains = dict(
            simple="mobile",
            i="compact",
            api="api",
            rss="rss",
            xml="xml",
            json="json",
        )

        ################# PROVIDERS
        self.auth_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.auth",
            self.authentication_provider,
        )
        self.media_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.media",
            self.media_provider,
        )
        self.cdn_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.cdn",
            self.cdn_provider,
        )
        self.ticket_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.support",
            # TODO: fix this later, it refuses to pick up 
            # g.config['ticket_provider'] value, so hardcoding for now.
            # really, the next uncommented line should be:
            #self.ticket_provider,
            # instead of:
            "zendesk",
        )
        self.image_resizing_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.image_resizing",
            self.image_resizing_provider,
        )
        self.email_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.email",
            self.email_provider,
        )
        self.startup_timer.intermediate("providers")

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = self.default_scheme + "://" + origin_prefix + self.domain

        self.trusted_domains = set([self.domain])
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.trusted_domains.add(https_url.hostname)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        # make python warnings go through the logging system
        logging.captureWarnings(capture=True)

        log = logging.getLogger('reddit')

        # when we're a script (paster run) just set up super simple logging
        if self.running_as_script:
            log.setLevel(logging.INFO)
            log.addHandler(logging.StreamHandler())

        # if in debug mode, override the logging level to DEBUG
        if self.debug:
            log.setLevel(logging.DEBUG)

        # attempt to figure out which pool we're in and add that to the
        # LogRecords.
        try:
            with open("/etc/ec2_asg", "r") as f:
                pool = f.read().strip()
            # clean up the pool name since we're putting stuff after "-"
            pool = pool.partition("-")[0]
        except IOError:
            pool = "reddit-app"
        self.log = logging.LoggerAdapter(log, {"pool": pool})

        # set locations
        locations = pkg_resources.resource_stream(__name__,
                                                  "../data/locations.json")
        self.locations = json.loads(locations.read())

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print >> sys.stderr, ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")
        if self.oauth_domain == self.domain:
            print >> sys.stderr, ("Warning: g.oauth_domain == g.domain. "
                    "CORS requests to g.domain will be allowed")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        locale.setlocale(locale.LC_ALL, self.locale)

        # Pre-calculate ratelimit values
        self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
        self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
                                      self.RL_RESET_SECONDS)

        self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
        self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
                                     self.RL_OAUTH_RESET_SECONDS)

        self.RL_LOGIN_MAX_REQS = int(self.config["RL_LOGIN_AVG_PER_SEC"] *
                                     self.RL_RESET_SECONDS)
        self.RL_LOGIN_IP_MAX_REQS = int(self.config["RL_LOGIN_IP_AVG_PER_SEC"] *
                                        self.RL_RESET_SECONDS)
        self.RL_SHARE_MAX_REQS = int(self.config["RL_SHARE_AVG_PER_SEC"] *
                                     self.RL_RESET_SECONDS)

        # Compile ratelimit regexs
        user_agent_ratelimit_regexes = {}
        for agent_re, limit in self.user_agent_ratelimit_regexes.iteritems():
            user_agent_ratelimit_regexes[re.compile(agent_re)] = limit
        self.user_agent_ratelimit_regexes = user_agent_ratelimit_regexes

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        zk_hosts = self.config["zookeeper_connection_string"]
        zk_username = self.config["zookeeper_username"]
        zk_password = self.config["zookeeper_password"]
        self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
                                                         zk_password))

        self.throttles = IPNetworkLiveList(
            self.zookeeper,
            root="/throttles",
            reduced_data_node="/throttles_reduced",
        )

        parser = ConfigParser.RawConfigParser()
        parser.optionxform = str
        parser.read([self.config["__file__"]])

        if self.config["liveconfig_source"] == "zookeeper":
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
        else:
            self.live_config = extract_live_config(parser, self.plugins)

        if self.config["secrets_source"] == "zookeeper":
            self.secrets = fetch_secrets(self.zookeeper)
        else:
            self.secrets = extract_secrets(parser)

        ################# PRIVILEGED USERS
        self.admins = PermissionFilteredEmployeeList(
            self.live_config, type="admin")
        self.sponsors = PermissionFilteredEmployeeList(
            self.live_config, type="sponsor")
        self.employees = PermissionFilteredEmployeeList(
            self.live_config, type="employee")

        # Store which OAuth clients employees may use, the keys are just for
        # readability.
        self.employee_approved_clients = \
            self.live_config["employee_approved_clients"].values()

        self.startup_timer.intermediate("zookeeper")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # a smaller pool of caches used only for distributed locks.
        self.lock_cache = CMemcache(
            "lock",
            self.lockcaches,
            num_clients=num_mc_clients,
        )
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        permacache_memcaches = CMemcache(
            "perma",
            self.permacache_memcaches,
            min_compress_len=1400,
            num_clients=num_mc_clients,
        )

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(
                "stale",
                self.stalecaches,
                num_clients=num_mc_clients,
            )
        else:
            stalecaches = None

        self.startup_timer.intermediate("memcache")

        ################# MCROUTER
        self.mcrouter = Mcrouter(
            "mcrouter",
            self.mcrouter_addr,
            min_compress_len=1400,
            num_clients=num_mc_clients,
        )

        ################# THRIFT-BASED SERVICES
        activity_endpoint = self.config.get("activity_endpoint")
        if activity_endpoint:
            # make ActivityInfo objects rendercache-key friendly
            # TODO: figure out a more general solution for this if
            # we need to do this for other thrift-generated objects
            ActivityInfo.cache_key = lambda self, style: repr(self)

            activity_pool = ThriftConnectionPool(activity_endpoint, timeout=0.1)
            self.baseplate.add_to_context("activity_service",
                ThriftContextFactory(activity_pool, ActivityService.Client))

        self.startup_timer.intermediate("thrift")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=4,
                    max_retries=3,
                    prefill=False
                ),
        }

        permacache_cf = Permacache._setup_column_family(
            'permacache',
            self.cassandra_pools[self.cassandra_default_pool],
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        cache_chains = {}
        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)

        if stalecaches:
            self.gencache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.gencache = CacheChain((localcache_cls(), self.mcrouter))
        cache_chains.update(gencache=self.gencache)

        if stalecaches:
            self.thingcache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.thingcache = CacheChain((localcache_cls(), self.mcrouter))
        cache_chains.update(thingcache=self.thingcache)

        if stalecaches:
            self.memoizecache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.memoizecache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(memoizecache=self.memoizecache)

        if stalecaches:
            self.srmembercache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.srmembercache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(srmembercache=self.srmembercache)

        if stalecaches:
            self.relcache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.relcache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(relcache=self.relcache)

        self.ratelimitcache = MemcacheChain(
                (localcache_cls(), self.mcrouter))
        cache_chains.update(ratelimitcache=self.ratelimitcache)

        # rendercache holds rendered partial templates.
        self.rendercache = MemcacheChain((
            localcache_cls(),
            self.mcrouter,
        ))
        cache_chains.update(rendercache=self.rendercache)

        # commentpanecaches hold fully rendered comment panes
        self.commentpanecache = MemcacheChain((
            localcache_cls(),
            self.mcrouter,
        ))
        cache_chains.update(commentpanecache=self.commentpanecache)

        # cassandra_local_cache is used for request-local caching in tdb_cassandra
        self.cassandra_local_cache = localcache_cls()
        cache_chains.update(cassandra_local_cache=self.cassandra_local_cache)

        if stalecaches:
            permacache_cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                permacache_memcaches,
            )
        else:
            permacache_cache = CacheChain(
                (localcache_cls(), permacache_memcaches),
            )
        cache_chains.update(permacache=permacache_cache)

        self.permacache = Permacache(
            permacache_cache,
            permacache_cf,
            lock_factory=self.make_lock,
        )

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain(
            (localcache_cls(), HardCache(self)),
            cache_negative_results=True,
        )
        cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                if isinstance(chain, TransitionalCache):
                    chain = chain.read_chain

                chain.reset()
                if isinstance(chain, LocalCache):
                    continue
                elif isinstance(chain, StaleCacheChain):
                    chain.stats = StaleCacheStats(self.stats, name)
                else:
                    chain.stats = CacheStats(self.stats, name)
        self.cache_chains = cache_chains

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        # Initialize the amqp module globals, start the worker, etc.
        r2.lib.amqp.initialize(self)

        self.events = EventQueue()

        self.startup_timer.intermediate("revisions")
コード例 #3
0
ファイル: app_globals.py プロジェクト: Arinzeokeke/reddit
    def setup(self):
        self.env = ''
        if (
            # handle direct invocation of "nosetests"
            "test" in sys.argv[0] or
            # handle "setup.py test" and all permutations thereof.
            "setup.py" in sys.argv[0] and "test" in sys.argv[1:]
        ):
            self.env = "unit_test"

        self.queues = queues.declare_queues(self)

        self.extension_subdomains = dict(
            simple="mobile",
            i="compact",
            api="api",
            rss="rss",
            xml="xml",
            json="json",
        )

        ################# PROVIDERS
        self.auth_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.auth",
            self.authentication_provider,
        )
        self.media_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.media",
            self.media_provider,
        )
        self.cdn_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.cdn",
            self.cdn_provider,
        )
        self.ticket_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.support",
            # TODO: fix this later, it refuses to pick up 
            # g.config['ticket_provider'] value, so hardcoding for now.
            # really, the next uncommented line should be:
            #self.ticket_provider,
            # instead of:
            "zendesk",
        )
        self.image_resizing_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.image_resizing",
            self.image_resizing_provider,
        )
        self.email_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.email",
            self.email_provider,
        )
        self.startup_timer.intermediate("providers")

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = self.default_scheme + "://" + origin_prefix + self.domain

        self.trusted_domains = set([self.domain])
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.trusted_domains.add(https_url.hostname)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        # make python warnings go through the logging system
        logging.captureWarnings(capture=True)

        log = logging.getLogger('reddit')

        # when we're a script (paster run) just set up super simple logging
        if self.running_as_script:
            log.setLevel(logging.INFO)
            log.addHandler(logging.StreamHandler())

        # if in debug mode, override the logging level to DEBUG
        if self.debug:
            log.setLevel(logging.DEBUG)

        # attempt to figure out which pool we're in and add that to the
        # LogRecords.
        try:
            with open("/etc/ec2_asg", "r") as f:
                pool = f.read().strip()
            # clean up the pool name since we're putting stuff after "-"
            pool = pool.partition("-")[0]
        except IOError:
            pool = "reddit-app"
        self.log = logging.LoggerAdapter(log, {"pool": pool})

        # set locations
        locations = pkg_resources.resource_stream(__name__,
                                                  "../data/locations.json")
        self.locations = json.loads(locations.read())

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print >> sys.stderr, ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")
        if self.oauth_domain == self.domain:
            print >> sys.stderr, ("Warning: g.oauth_domain == g.domain. "
                    "CORS requests to g.domain will be allowed")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        locale.setlocale(locale.LC_ALL, self.locale)

        # Pre-calculate ratelimit values
        self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
        self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
                                      self.RL_RESET_SECONDS)

        self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
        self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
                                     self.RL_OAUTH_RESET_SECONDS)

        self.RL_LOGIN_MAX_REQS = int(self.config["RL_LOGIN_AVG_PER_SEC"] *
                                     self.RL_RESET_SECONDS)
        self.RL_LOGIN_IP_MAX_REQS = int(self.config["RL_LOGIN_IP_AVG_PER_SEC"] *
                                        self.RL_RESET_SECONDS)
        self.RL_SHARE_MAX_REQS = int(self.config["RL_SHARE_AVG_PER_SEC"] *
                                     self.RL_RESET_SECONDS)

        # Compile ratelimit regexs
        user_agent_ratelimit_regexes = {}
        for agent_re, limit in self.user_agent_ratelimit_regexes.iteritems():
            user_agent_ratelimit_regexes[re.compile(agent_re)] = limit
        self.user_agent_ratelimit_regexes = user_agent_ratelimit_regexes

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper,
                                          LiveConfig, LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
                                                             zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.secrets = fetch_secrets(self.zookeeper)
            self.throttles = LiveList(self.zookeeper, "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)

            # close our zk connection when the app shuts down
            SHUTDOWN_CALLBACKS.append(self.zookeeper.stop)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.optionxform = str
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.secrets = extract_secrets(parser)
            self.throttles = tuple()  # immutable since it's not real

        ################# PRIVILEGED USERS
        self.admins = PermissionFilteredEmployeeList(
            self.live_config, type="admin")
        self.sponsors = PermissionFilteredEmployeeList(
            self.live_config, type="sponsor")
        self.employees = PermissionFilteredEmployeeList(
            self.live_config, type="employee")

        # Store which OAuth clients employees may use, the keys are just for
        # readability.
        self.employee_approved_clients = \
            self.live_config["employee_approved_clients"].values()

        self.startup_timer.intermediate("zookeeper")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # the main memcache pool. used for most everything.
        memcaches = CMemcache(
            "main",
            self.memcaches,
            min_compress_len=1400,
            num_clients=num_mc_clients,
            validators=[validate_size_error],
        )

        # a pool just used for @memoize results
        memoizecaches = CMemcache(
            "memoize",
            self.memoizecaches,
            min_compress_len=50 * 1024,
            num_clients=num_mc_clients,
            validators=[validate_size_error],
        )

        # a pool just for srmember rels
        srmembercaches = CMemcache(
            "srmember",
            self.srmembercaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
            validators=[validate_size_error],
        )

        # a pool just for rels
        relcaches = CMemcache(
            "rel",
            self.relcaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
            validators=[validate_size_error],
        )

        ratelimitcaches = CMemcache(
            "ratelimit",
            self.ratelimitcaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
            validators=[validate_size_error],
        )

        # a smaller pool of caches used only for distributed locks.
        self.lock_cache = CMemcache(
            "lock",
            self.lockcaches,
            num_clients=num_mc_clients,
            validators=[validate_size_error],
        )
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        permacache_memcaches = CMemcache("perma",
                                         self.permacache_memcaches,
                                         min_compress_len=1400,
                                         num_clients=num_mc_clients,
                                         validators=[],)

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(
                "stale",
                self.stalecaches,
                num_clients=num_mc_clients,
                validators=[validate_size_error],
            )
        else:
            stalecaches = None

        # hardcache memcache pool
        hardcache_memcaches = CMemcache(
            "hardcache",
            self.hardcache_memcaches,
            binary=True,
            min_compress_len=1400,
            num_clients=num_mc_clients,
            validators=[validate_size_error],
        )

        self.startup_timer.intermediate("memcache")

        ################# MCROUTER
        self.mcrouter = Mcrouter(
            "mcrouter",
            self.mcrouter_addr,
            min_compress_len=1400,
            num_clients=1,
        )

        ################# THRIFT-BASED SERVICES
        activity_endpoint = self.config.get("activity_endpoint")
        if activity_endpoint:
            # make ActivityInfo objects rendercache-key friendly
            # TODO: figure out a more general solution for this if
            # we need to do this for other thrift-generated objects
            ActivityInfo.cache_key = lambda self, style: repr(self)

            activity_pool = ThriftConnectionPool(activity_endpoint, timeout=0.1)
            self.baseplate.add_to_context("activity_service",
                ThriftContextFactory(activity_pool, ActivityService.Client))

        self.startup_timer.intermediate("thrift")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=4,
                    max_retries=3,
                    prefill=False
                ),
        }

        permacache_cf = Permacache._setup_column_family(
            'permacache',
            self.cassandra_pools[self.cassandra_default_pool],
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        cache_chains = {}
        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)

        if stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                memcaches,
            )
        else:
            self.cache = CacheChain((localcache_cls(), memcaches))
        cache_chains.update(cache=self.cache)

        if stalecaches:
            self.thingcache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.mcrouter,
            )
        else:
            self.thingcache = CacheChain((localcache_cls(), self.mcrouter))
        cache_chains.update(thingcache=self.thingcache)

        if stalecaches:
            self.memoizecache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                memoizecaches,
            )
        else:
            self.memoizecache = MemcacheChain(
                (localcache_cls(), memoizecaches))
        cache_chains.update(memoizecache=self.memoizecache)

        if stalecaches:
            self.srmembercache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                srmembercaches,
            )
        else:
            self.srmembercache = MemcacheChain(
                (localcache_cls(), srmembercaches))
        cache_chains.update(srmembercache=self.srmembercache)

        if stalecaches:
            self.relcache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                relcaches,
            )
        else:
            self.relcache = MemcacheChain(
                (localcache_cls(), relcaches))
        cache_chains.update(relcache=self.relcache)

        self.ratelimitcache = MemcacheChain(
                (localcache_cls(), ratelimitcaches))
        cache_chains.update(ratelimitcache=self.ratelimitcache)

        # rendercache holds rendered partial templates.
        self.rendercache = MemcacheChain((
            localcache_cls(),
            self.mcrouter,
        ))
        cache_chains.update(rendercache=self.rendercache)

        # pagecaches hold fully rendered pages (includes comment panes)
        self.pagecache = MemcacheChain((
            localcache_cls(),
            self.mcrouter,
        ))
        cache_chains.update(pagecache=self.pagecache)

        # cassandra_local_cache is used for request-local caching in tdb_cassandra
        self.cassandra_local_cache = localcache_cls()
        cache_chains.update(cassandra_local_cache=self.cassandra_local_cache)

        if stalecaches:
            permacache_cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                permacache_memcaches,
            )
        else:
            permacache_cache = CacheChain(
                (localcache_cls(), permacache_memcaches),
            )
        cache_chains.update(permacache=permacache_cache)

        self.permacache = Permacache(
            permacache_cache,
            permacache_cf,
            lock_factory=self.make_lock,
        )

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain(
            (localcache_cls(), hardcache_memcaches, HardCache(self)),
            cache_negative_results=True,
        )
        cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                if isinstance(chain, TransitionalCache):
                    chain = chain.read_chain

                chain.reset()
                if isinstance(chain, LocalCache):
                    continue
                elif isinstance(chain, StaleCacheChain):
                    chain.stats = StaleCacheStats(self.stats, name)
                else:
                    chain.stats = CacheStats(self.stats, name)
        self.cache_chains = cache_chains

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        # Initialize the amqp module globals, start the worker, etc.
        r2.lib.amqp.initialize(self)

        self.events = EventQueue()

        self.startup_timer.intermediate("revisions")
コード例 #4
0
ファイル: app_globals.py プロジェクト: Agrajagd/reddit
    def setup(self):
        self.queues = queues.declare_queues(self)

        ################# PROVIDERS
        self.media_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.media",
            self.media_provider,
        )
        self.startup_timer.intermediate("providers")

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain

        self.trusted_domains = set([self.domain])
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.trusted_domains.add(https_url.hostname)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        # make python warnings go through the logging system
        logging.captureWarnings(capture=True)

        log = logging.getLogger('reddit')

        # when we're a script (paster run) just set up super simple logging
        if self.running_as_script:
            log.setLevel(logging.INFO)
            log.addHandler(logging.StreamHandler())

        # if in debug mode, override the logging level to DEBUG
        if self.debug:
            log.setLevel(logging.DEBUG)

        # attempt to figure out which pool we're in and add that to the
        # LogRecords.
        try:
            with open("/etc/ec2_asg", "r") as f:
                pool = f.read().strip()
            # clean up the pool name since we're putting stuff after "-"
            pool = pool.partition("-")[0]
        except IOError:
            pool = "reddit-app"
        self.log = logging.LoggerAdapter(log, {"pool": pool})

        # set locations
        self.locations = {}

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print >> sys.stderr, ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        locale.setlocale(locale.LC_ALL, self.locale)

        # Pre-calculate ratelimit values
        self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
        self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
                                      self.RL_RESET_SECONDS)

        self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
        self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
                                     self.RL_OAUTH_RESET_SECONDS)

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper,
                                          LiveConfig, LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
                                                             zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.secrets = fetch_secrets(self.zookeeper)
            self.throttles = LiveList(self.zookeeper, "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.optionxform = str
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.secrets = extract_secrets(parser)
            self.throttles = tuple()  # immutable since it's not real

        self.startup_timer.intermediate("zookeeper")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # the main memcache pool. used for most everything.
        self.memcache = CMemcache(
            self.memcaches,
            min_compress_len=50 * 1024,
            num_clients=num_mc_clients,
        )

        # a pool just used for @memoize results
        memoizecaches = CMemcache(
            self.memoizecaches,
            min_compress_len=50 * 1024,
            num_clients=num_mc_clients,
        )

        # a pool just for srmember rels
        srmembercaches = CMemcache(
            self.srmembercaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
        )

        ratelimitcaches = CMemcache(
            self.ratelimitcaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
        )

        # a smaller pool of caches used only for distributed locks.
        # TODO: move this to ZooKeeper
        self.lock_cache = CMemcache(self.lockcaches,
                                    num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        if self.permacache_memcaches:
            permacache_memcaches = CMemcache(self.permacache_memcaches,
                                             min_compress_len=50 * 1024,
                                             num_clients=num_mc_clients)
        else:
            permacache_memcaches = None

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(self.stalecaches,
                                    num_clients=num_mc_clients)
        else:
            stalecaches = None

        # rendercache holds rendered partial templates.
        rendercaches = CMemcache(
            self.rendercaches,
            noreply=True,
            no_block=True,
            num_clients=num_mc_clients,
            min_compress_len=480,
        )

        # pagecaches hold fully rendered pages
        pagecaches = CMemcache(
            self.pagecaches,
            noreply=True,
            no_block=True,
            num_clients=num_mc_clients,
            min_compress_len=1400,
        )

        self.startup_timer.intermediate("memcache")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=4,
                    max_retries=3,
                    prefill=False
                ),
        }

        permacache_cf = CassandraCache(
            'permacache',
            self.cassandra_pools[self.cassandra_default_pool],
            read_consistency_level=self.cassandra_rcl,
            write_consistency_level=self.cassandra_wcl
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        cache_chains = {}
        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)

        if stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.memcache,
            )
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        cache_chains.update(cache=self.cache)

        if stalecaches:
            self.memoizecache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                memoizecaches,
            )
        else:
            self.memoizecache = MemcacheChain(
                (localcache_cls(), memoizecaches))
        cache_chains.update(memoizecache=self.memoizecache)

        if stalecaches:
            self.srmembercache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                srmembercaches,
            )
        else:
            self.srmembercache = MemcacheChain(
                (localcache_cls(), srmembercaches))
        cache_chains.update(srmembercache=self.srmembercache)

        self.ratelimitcache = MemcacheChain(
                (localcache_cls(), ratelimitcaches))
        cache_chains.update(ratelimitcache=self.ratelimitcache)

        self.rendercache = MemcacheChain((
            localcache_cls(),
            rendercaches,
        ))
        cache_chains.update(rendercache=self.rendercache)

        self.pagecache = MemcacheChain((
            localcache_cls(),
            pagecaches,
        ))
        cache_chains.update(pagecache=self.pagecache)

        # the thing_cache is used in tdb_cassandra.
        self.thing_cache = CacheChain((localcache_cls(),))
        cache_chains.update(thing_cache=self.thing_cache)

        self.permacache = CassandraCacheChain(
            localcache_cls(),
            permacache_cf,
            memcache=permacache_memcaches,
            lock_factory=self.make_lock,
        )
        cache_chains.update(permacache=self.permacache)

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True,
        )
        cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)
        self.cache_chains = cache_chains

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        self.startup_timer.intermediate("revisions")
コード例 #5
0
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.

        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.

        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.

        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.

        """

        # slop over all variables to start with
        for k, v in global_conf.iteritems():
            if not k.startswith("_") and not hasattr(self, k):
                if k in self.int_props:
                    v = int(v)
                elif k in self.float_props:
                    v = float(v)
                elif k in self.bool_props:
                    v = self.to_bool(v)
                elif k in self.tuple_props:
                    v = tuple(self.to_iter(v))
                setattr(self, k, v)

        self.running_as_script = global_conf.get('running_as_script', False)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache
                          if self.running_as_script else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = []

        self.permacache = self.init_cass_cache('permacache',
                                               self.permacache_memcaches,
                                               self.cassandra_seeds)

        self.urlcache = self.init_cass_cache('urls', self.url_caches,
                                             self.url_seeds)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        self.cache = self.init_memcached(self.memcaches)
        self.memcache = self.cache.caches[1]  # used by lock.py

        self.rendercache = self.init_memcached(self.rendercaches,
                                               noreply=True,
                                               no_block=True)

        self.servicecache = self.init_memcached(self.servicecaches)

        self.make_lock = make_lock_factory(self.memcache)

        # set default time zone if one is not set
        tz = global_conf.get('timezone')
        dtz = global_conf.get('display_timezone', tz)

        self.tz = pytz.timezone(tz)
        self.display_tz = pytz.timezone(dtz)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True)
        self.cache_chains.append(self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains[::]

        def reset_caches():
            for chain in cache_chains:
                chain.reset()

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        # turn on for language support
        self.languages, self.lang_name = \
                        get_active_langs(default_lang= self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        self.paths = paths

        # load the md5 hashes of files under static
        static_files = os.path.join(paths.get('static_files'), 'static')
        self.static_md5 = {}
        if os.path.exists(static_files):
            for f in os.listdir(static_files):
                if f.endswith('.md5'):
                    key = f.strip('.md5')
                    f = os.path.join(static_files, f)
                    with open(f, 'r') as handle:
                        md5 = handle.read().strip('\n')
                    self.static_md5[key] = md5

        #set up the logging directory
        log_path = self.log_path
        process_iden = global_conf.get('scgi_port', 'default')
        self.reddit_port = process_iden
        if log_path:
            if not os.path.exists(log_path):
                os.makedirs(log_path)
            for fname in os.listdir(log_path):
                if fname.startswith(process_iden):
                    full_name = os.path.join(log_path, fname)
                    os.remove(full_name)

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.WARNING)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print("Warning: g.media_domain == g.domain. " +
                  "This may give untrusted content access to user cookies")

        #read in our CSS so that it can become a default for subreddit
        #stylesheets
        stylesheet_path = os.path.join(paths.get('static_files'),
                                       self.static_path.lstrip('/'),
                                       self.stylesheet)
        with open(stylesheet_path) as s:
            self.default_stylesheet = s.read()

        self.profanities = None
        if self.profanity_wordlist and os.path.exists(self.profanity_wordlist):
            with open(self.profanity_wordlist, 'r') as handle:
                words = []
                for line in handle:
                    words.append(line.strip(' \n\r'))
                if words:
                    self.profanities = re.compile(
                        r"\b(%s)\b" % '|'.join(words), re.I | re.U)

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        #the shutdown toggle
        self.shutdown = False

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # try to set the source control revision number
        try:
            popen = subprocess.Popen(
                ["git", "log", "--date=short", "--pretty=format:%H %h", '-n1'],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE)
            resp, stderrdata = popen.communicate()
            resp = resp.strip().split(' ')
            self.version, self.short_version = resp
        except object, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'
コード例 #6
0
ファイル: app_globals.py プロジェクト: debanshuk/reddit
    def setup(self):
        self.queues = queues.declare_queues(self)

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        # XXX: get rid of these options. new query cache is always on.
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])

        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)
        if getattr(self, 'oauth_domain', None):
            self.secure_domains.add(self.oauth_domain)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper,
                                          LiveConfig, LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
                                                             zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.throttles = LiveList(self.zookeeper, "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.throttles = tuple()  # immutable since it's not real
        self.startup_timer.intermediate("zookeeper")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # the main memcache pool. used for most everything.
        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)

        # a smaller pool of caches used only for distributed locks.
        # TODO: move this to ZooKeeper
        self.lock_cache = CMemcache(self.lockcaches,
                                    num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        if self.permacache_memcaches:
            permacache_memcaches = CMemcache(self.permacache_memcaches,
                                             num_clients=num_mc_clients)
        else:
            permacache_memcaches = None

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(self.stalecaches,
                                    num_clients=num_mc_clients)
        else:
            stalecaches = None

        # rendercache holds rendered partial templates.
        rendercaches = CMemcache(
            self.rendercaches,
            noreply=True,
            no_block=True,
            num_clients=num_mc_clients,
        )

        # pagecaches hold fully rendered pages
        pagecaches = CMemcache(
            self.pagecaches,
            noreply=True,
            no_block=True,
            num_clients=num_mc_clients,
        )

        self.startup_timer.intermediate("memcache")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=4,
                    max_retries=3,
                    prefill=False
                ),
        }

        permacache_cf = CassandraCache(
            'permacache',
            self.cassandra_pools[self.cassandra_default_pool],
            read_consistency_level=self.cassandra_rcl,
            write_consistency_level=self.cassandra_wcl
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        event.listens_for(engine.Engine, 'before_cursor_execute')(
            self.stats.pg_before_cursor_execute)
        event.listens_for(engine.Engine, 'after_cursor_execute')(
            self.stats.pg_after_cursor_execute)

        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        self.cache_chains = {}
        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)

        if stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.memcache,
            )
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain((
            localcache_cls(),
            rendercaches,
        ))
        self.cache_chains.update(rendercache=self.rendercache)

        self.pagecache = MemcacheChain((
            localcache_cls(),
            pagecaches,
        ))
        self.cache_chains.update(pagecache=self.pagecache)

        # the thing_cache is used in tdb_cassandra.
        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.update(thing_cache=self.thing_cache)

        self.permacache = CassandraCacheChain(
            localcache_cls(),
            permacache_cf,
            memcache=permacache_memcaches,
            lock_factory=self.make_lock,
        )
        self.cache_chains.update(permacache=self.permacache)

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True,
        )
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        self.startup_timer.intermediate("revisions")
コード例 #7
0
ファイル: app_globals.py プロジェクト: pookleblinky/lesswrong
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.
        
        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.
            
        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.
            
        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.
            
        """

        def to_bool(x):
            return (x.lower() == 'true') if x else None
        
        def to_iter(name, delim = ','):
            return (x.strip() for x in global_conf.get(name, '').split(delim))


        # slop over all variables to start with
        for k, v in  global_conf.iteritems():
            if not k.startswith("_") and not hasattr(self, k):
                if k in self.int_props:
                    v = int(v)
                elif k in self.bool_props:
                    v = to_bool(v)
                elif k in self.tuple_props:
                    v = tuple(to_iter(k))
                setattr(self, k, v)

        # initialize caches
        mc = Memcache(self.memcaches)
        self.cache = CacheChain((LocalCache(), mc))
        self.permacache = Memcache(self.permacaches)
        self.rendercache = Memcache(self.rendercaches)
        self.make_lock = make_lock_factory(mc)

        self.rec_cache = Memcache(self.rec_cache)
        
        # set default time zone if one is not set
        self.tz = pytz.timezone(global_conf.get('timezone'))

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        # turn on for language support
        self.languages, self.lang_name = _get_languages()

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        # load the md5 hashes of files under static
        static_files = os.path.join(paths.get('static_files'), 'static')
        self.static_md5 = {}
        if os.path.exists(static_files):
            for f in os.listdir(static_files):
                if f.endswith('.md5'):
                    key = f[0:-4]
                    f = os.path.join(static_files, f)
                    with open(f, 'r') as handle:
                        md5 = handle.read().strip('\n')
                    self.static_md5[key] = md5


        #set up the logging directory
        log_path = self.log_path
        process_iden = global_conf.get('scgi_port', 'default')
        if log_path:
            if not os.path.exists(log_path):
                os.makedirs(log_path)
            for fname in os.listdir(log_path):
                if fname.startswith(process_iden):
                    full_name = os.path.join(log_path, fname)
                    os.remove(full_name)

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)

        #read in our CSS so that it can become a default for subreddit
        #stylesheets
        stylesheet_path = os.path.join(paths.get('static_files'),
                                       self.static_path.lstrip('/'),
                                       self.stylesheet)
        with open(stylesheet_path) as s:
            self.default_stylesheet = s.read()

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()
コード例 #8
0
ファイル: app_globals.py プロジェクト: 99plus2/reddit
    def setup(self):
        self.queues = queues.declare_queues(self)

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])

        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)
        if getattr(self, "oauth_domain", None):
            self.secure_domains.add(self.oauth_domain)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get("static_files"), "static")
        names_file_path = os.path.join(static_files, "names.json")
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        # if we're a web app running on old uwsgi, force load the logging
        # config from the file since uwsgi didn't do it for us
        if not self.running_as_script and self.old_uwsgi_load_logging_config:
            logging.config.fileConfig(self.config["__file__"])

        # make python warnings go through the logging system
        logging.captureWarnings(capture=True)

        log = logging.getLogger("reddit")

        # when we're a script (paster run) just set up super simple logging
        if self.running_as_script:
            log.setLevel(logging.INFO)
            log.addHandler(logging.StreamHandler())

        # if in debug mode, override the logging level to DEBUG
        if self.debug:
            log.setLevel(logging.DEBUG)

        # attempt to figure out which pool we're in and add that to the
        # LogRecords.
        try:
            with open("/etc/ec2_asg", "r") as f:
                pool = f.read().strip()
            # clean up the pool name since we're putting stuff after "-"
            pool = pool.partition("-")[0]
        except IOError:
            pool = "reddit-app"
        self.log = logging.LoggerAdapter(log, {"pool": pool})

        # make cssutils use the real logging system
        csslog = logging.getLogger("cssutils")
        cssutils.log.setLog(csslog)

        # load the country list
        countries_file_path = os.path.join(static_files, "countries.json")
        try:
            with open(countries_file_path) as handle:
                self.countries = json.load(handle)
            self.log.debug("Using countries.json.")
        except IOError:
            self.log.warning("Couldn't find countries.json. Using pycountry.")
            self.countries = get_countries_and_codes()

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print("Warning: g.media_domain == g.domain. " + "This may give untrusted content access to user cookies")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        if hasattr(signal, "SIGUSR1"):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import connect_to_zookeeper, LiveConfig, LiveList

            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username, zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.throttles = LiveList(
                self.zookeeper, "/throttles", map_fn=ipaddress.ip_network, reduce_fn=ipaddress.collapse_addresses
            )
            self.banned_domains = LiveDict(self.zookeeper, "/banned-domains", watch=True)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.throttles = tuple()  # immutable since it's not real
            self.banned_domains = dict()
        self.startup_timer.intermediate("zookeeper")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # the main memcache pool. used for most everything.
        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)

        # a smaller pool of caches used only for distributed locks.
        # TODO: move this to ZooKeeper
        self.lock_cache = CMemcache(self.lockcaches, num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        if self.permacache_memcaches:
            permacache_memcaches = CMemcache(self.permacache_memcaches, num_clients=num_mc_clients)
        else:
            permacache_memcaches = None

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(self.stalecaches, num_clients=num_mc_clients)
        else:
            stalecaches = None

        # rendercache holds rendered partial templates.
        rendercaches = CMemcache(self.rendercaches, noreply=True, no_block=True, num_clients=num_mc_clients)

        # pagecaches hold fully rendered pages
        pagecaches = CMemcache(self.pagecaches, noreply=True, no_block=True, num_clients=num_mc_clients)

        self.startup_timer.intermediate("memcache")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main": StatsCollectingConnectionPool(
                keyspace,
                stats=self.stats,
                logging_name="main",
                server_list=self.cassandra_seeds,
                pool_size=self.cassandra_pool_size,
                timeout=4,
                max_retries=3,
                prefill=False,
            )
        }

        permacache_cf = CassandraCache(
            "permacache",
            self.cassandra_pools[self.cassandra_default_pool],
            read_consistency_level=self.cassandra_rcl,
            write_consistency_level=self.cassandra_wcl,
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        event.listens_for(engine.Engine, "before_cursor_execute")(self.stats.pg_before_cursor_execute)
        event.listens_for(engine.Engine, "after_cursor_execute")(self.stats.pg_after_cursor_execute)

        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        self.cache_chains = {}
        localcache_cls = SelfEmptyingCache if self.running_as_script else LocalCache

        if stalecaches:
            self.cache = StaleCacheChain(localcache_cls(), stalecaches, self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain((localcache_cls(), rendercaches))
        self.cache_chains.update(rendercache=self.rendercache)

        self.pagecache = MemcacheChain((localcache_cls(), pagecaches))
        self.cache_chains.update(pagecache=self.pagecache)

        # the thing_cache is used in tdb_cassandra.
        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.update(thing_cache=self.thing_cache)

        self.permacache = CassandraCacheChain(
            localcache_cls(), permacache_cf, memcache=permacache_memcaches, lock_factory=self.make_lock
        )
        self.cache_chains.update(permacache=self.permacache)

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain((localcache_cls(), self.memcache, HardCache(self)), cache_negative_results=True)
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()

        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        self.startup_timer.intermediate("revisions")
コード例 #9
0
    def setup(self):
        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache
                          if self.running_as_script else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = {}

        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper, LiveConfig,
                                          LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts,
                                                  (zk_username, zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.throttles = LiveList(self.zookeeper,
                                      "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.throttles = tuple()  # immutable since it's not real

        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)
        self.lock_cache = CMemcache(self.lockcaches,
                                    num_clients=num_mc_clients)

        self.stats = Stats(self.config.get('statsd_addr'),
                           self.config.get('statsd_sample_rate'))

        event.listens_for(engine.Engine, 'before_cursor_execute')(
            self.stats.pg_before_cursor_execute)
        event.listens_for(engine.Engine, 'after_cursor_execute')(
            self.stats.pg_after_cursor_execute)

        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
            StatsCollectingConnectionPool(keyspace,
                                          stats=self.stats,
                                          logging_name="main",
                                          server_list=self.cassandra_seeds,
                                          pool_size=self.cassandra_pool_size,
                                          timeout=2,
                                          max_retries=3,
                                          prefill=False),
        }

        perma_memcache = (CMemcache(self.permacache_memcaches,
                                    num_clients=num_mc_clients)
                          if self.permacache_memcaches else None)
        self.permacache = CassandraCacheChain(
            localcache_cls(),
            CassandraCache('permacache',
                           self.cassandra_pools[self.cassandra_default_pool],
                           read_consistency_level=self.cassandra_rcl,
                           write_consistency_level=self.cassandra_wcl),
            memcache=perma_memcache,
            lock_factory=self.make_lock)

        self.cache_chains.update(permacache=self.permacache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        if self.stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                CMemcache(self.stalecaches, num_clients=num_mc_clients),
                self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.rendercaches,
                       noreply=True,
                       no_block=True,
                       num_clients=num_mc_clients)))
        self.cache_chains.update(rendercache=self.rendercache)

        self.thing_cache = CacheChain((localcache_cls(), ))
        self.cache_chains.update(thing_cache=self.thing_cache)

        #load the database info
        self.dbm = self.load_db_params()

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True)
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()

        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])

        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)
        if getattr(self, 'oauth_domain', None):
            self.secure_domains.add(self.oauth_domain)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print("Warning: g.media_domain == g.domain. " +
                  "This may give untrusted content access to user cookies")

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        if self.log_start:
            self.log.error("reddit app %s:%s started %s at %s" %
                           (self.reddit_host, self.reddit_pid,
                            self.short_version, datetime.now()))
コード例 #10
0
ファイル: app_globals.py プロジェクト: redhotpenguin/reddit
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.

        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.

        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.

        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.

        """

        # slop over all variables to start with
        for k, v in  global_conf.iteritems():
            if not k.startswith("_") and not hasattr(self, k):
                if k in self.int_props:
                    v = int(v)
                elif k in self.float_props:
                    v = float(v)
                elif k in self.bool_props:
                    v = self.to_bool(v)
                elif k in self.tuple_props:
                    v = tuple(self.to_iter(v))
                elif k in self.choice_props:
                    if v not in self.choice_props[k]:
                        raise ValueError("Unknown option for %r: %r not in %r"
                                         % (k, v, self.choice_props[k]))
                    v = self.choice_props[k][v]
                setattr(self, k, v)

        self.running_as_script = global_conf.get('running_as_script', False)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = []

        self.memcache = CMemcache(self.memcaches, num_clients = num_mc_clients)
        self.make_lock = make_lock_factory(self.memcache)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")
        if not self.url_seeds:
            raise ValueError("url_seeds not set in the .ini")
        self.cassandra_seeds = list(self.cassandra_seeds)
        random.shuffle(self.cassandra_seeds)
        self.cassandra = pycassa.connect_thread_local(self.cassandra_seeds)
        perma_memcache = (CMemcache(self.permacache_memcaches, num_clients = num_mc_clients)
                          if self.permacache_memcaches
                          else None)
        self.permacache = self.init_cass_cache('permacache', 'permacache',
                                               self.cassandra,
                                               self.make_lock,
                                               memcache = perma_memcache,
                                               read_consistency_level = self.cassandra_rcl,
                                               write_consistency_level = self.cassandra_wcl,
                                               localcache_cls = localcache_cls)
        self.cache_chains.append(self.permacache)

        self.url_seeds = list(self.url_seeds)
        random.shuffle(self.url_seeds)
        self.url_cassandra = pycassa.connect_thread_local(self.url_seeds)
        self.urlcache = self.init_cass_cache('urls', 'urls',
                                             self.url_cassandra,
                                             self.make_lock,
                                             # until we've merged this
                                             # with the regular
                                             # cluster, this will
                                             # always be CL_ONE
                                             read_consistency_level = CL_ONE,
                                             write_consistency_level = CL_ONE,
                                             localcache_cls = localcache_cls)
        self.cache_chains.append(self.urlcache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.append(self.cache)

        self.rendercache = MemcacheChain((localcache_cls(),
                                          CMemcache(self.rendercaches,
                                                    noreply=True, no_block=True,
                                                    num_clients = num_mc_clients)))
        self.cache_chains.append(self.rendercache)

        self.servicecache = MemcacheChain((localcache_cls(),
                                           CMemcache(self.servicecaches,
                                                     num_clients = num_mc_clients)))
        self.cache_chains.append(self.servicecache)

        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.append(self.thing_cache)

        # set default time zone if one is not set
        tz = global_conf.get('timezone')
        dtz = global_conf.get('display_timezone', tz)

        self.tz = pytz.timezone(tz)
        self.display_tz = pytz.timezone(dtz)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain((localcache_cls(),
                                         self.memcache,
                                         HardCache(self)),
                                        cache_negative_results = True)
        self.cache_chains.append(self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains[::]
        def reset_caches():
            for chain in cache_chains:
                chain.reset()

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        # turn on for language support
        self.languages, self.lang_name = \
                        get_active_langs(default_lang= self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        self.paths = paths

        # load the md5 hashes of files under static
        static_files = os.path.join(paths.get('static_files'), 'static')
        self.static_md5 = {}
        if os.path.exists(static_files):
            for f in os.listdir(static_files):
                if f.endswith('.md5'):
                    key = f[0:-4]
                    f = os.path.join(static_files, f)
                    with open(f, 'r') as handle:
                        md5 = handle.read().strip('\n')
                    self.static_md5[key] = md5


        #set up the logging directory
        log_path = self.log_path
        process_iden = global_conf.get('scgi_port', 'default')
        self.reddit_port = process_iden
        if log_path:
            if not os.path.exists(log_path):
                os.makedirs(log_path)
            for fname in os.listdir(log_path):
                if fname.startswith(process_iden):
                    full_name = os.path.join(log_path, fname)
                    os.remove(full_name)

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        #if self.media_domain == self.domain:
            #print ("Warning: g.media_domain == g.domain. " +
            #       "This may give untrusted content access to user cookies")

        #read in our CSS so that it can become a default for subreddit
        #stylesheets
        stylesheet_path = os.path.join(paths.get('static_files'),
                                       self.static_path.lstrip('/'),
                                       self.stylesheet)
        with open(stylesheet_path) as s:
            self.default_stylesheet = s.read()

        self.profanities = None
        if self.profanity_wordlist and os.path.exists(self.profanity_wordlist):
            with open(self.profanity_wordlist, 'r') as handle:
                words = []
                for line in handle:
                    words.append(line.strip(' \n\r'))
                if words:
                    self.profanities = re.compile(r"\b(%s)\b" % '|'.join(words),
                                              re.I | re.U)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        #the shutdown toggle
        self.shutdown = False

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision number
        try:
            popen = subprocess.Popen(["git", "log", "--date=short",
                                      "--pretty=format:%H %h", '-n1'],
                                     stdin=subprocess.PIPE,
                                     stdout=subprocess.PIPE)
            resp, stderrdata = popen.communicate()
            resp = resp.strip().split(' ')
            self.version, self.short_version = resp
        except object, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'
コード例 #11
0
ファイル: app_globals.py プロジェクト: DamonAnderson/reddit
    def setup(self, global_conf):
        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = {}

        self.memcache = CMemcache(self.memcaches, num_clients = num_mc_clients)
        self.make_lock = make_lock_factory(self.memcache)

        self.stats = Stats(global_conf.get('statsd_addr'),
                           global_conf.get('statsd_sample_rate'))

        event.listens_for(engine.Engine, 'before_cursor_execute')(
            self.stats.pg_before_cursor_execute)
        event.listens_for(engine.Engine, 'after_cursor_execute')(
            self.stats.pg_after_cursor_execute)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")


        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=2,
                    max_retries=3,
                    prefill=False
                ),
            "noretries":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="noretries",
                    server_list=self.cassandra_seeds,
                    pool_size=len(self.cassandra_seeds),
                    timeout=2,
                    max_retries=0,
                    prefill=False
                ),
        }

        perma_memcache = (CMemcache(self.permacache_memcaches, num_clients = num_mc_clients)
                          if self.permacache_memcaches
                          else None)
        self.permacache = CassandraCacheChain(localcache_cls(),
                                              CassandraCache('permacache',
                                                             self.cassandra_pools[self.cassandra_default_pool],
                                                             read_consistency_level = self.cassandra_rcl,
                                                             write_consistency_level = self.cassandra_wcl),
                                              memcache = perma_memcache,
                                              lock_factory = self.make_lock)

        self.cache_chains.update(permacache=self.permacache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        if self.stalecaches:
            self.cache = StaleCacheChain(localcache_cls(),
                                         CMemcache(self.stalecaches, num_clients=num_mc_clients),
                                         self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain((localcache_cls(),
                                          CMemcache(self.rendercaches,
                                                    noreply=True, no_block=True,
                                                    num_clients = num_mc_clients)))
        self.cache_chains.update(rendercache=self.rendercache)

        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.update(thing_cache=self.thing_cache)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain((localcache_cls(),
                                         self.memcache,
                                         HardCache(self)),
                                        cache_negative_results = True)
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])
        
        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)


        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision number
        try:
            self.version = subprocess.check_output(["git", "rev-parse", "HEAD"])
        except subprocess.CalledProcessError, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'
コード例 #12
0
ファイル: app_globals.py プロジェクト: peakcyber/reddit
    def setup(self, global_conf):
        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache
                          if self.running_as_script else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = {}

        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.memcache)

        self.stats = Stats(global_conf.get('statsd_addr'),
                           global_conf.get('statsd_sample_rate'))

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
            StatsCollectingConnectionPool(keyspace,
                                          stats=self.stats,
                                          logging_name="main",
                                          server_list=self.cassandra_seeds,
                                          pool_size=len(self.cassandra_seeds),
                                          timeout=2,
                                          max_retries=3,
                                          prefill=False),
            "noretries":
            StatsCollectingConnectionPool(keyspace,
                                          stats=self.stats,
                                          logging_name="noretries",
                                          server_list=self.cassandra_seeds,
                                          pool_size=len(self.cassandra_seeds),
                                          timeout=.1,
                                          max_retries=0,
                                          prefill=False),
        }

        perma_memcache = (CMemcache(self.permacache_memcaches,
                                    num_clients=num_mc_clients)
                          if self.permacache_memcaches else None)
        self.permacache = CassandraCacheChain(
            localcache_cls(),
            CassandraCache('permacache',
                           self.cassandra_pools[self.cassandra_default_pool],
                           read_consistency_level=self.cassandra_rcl,
                           write_consistency_level=self.cassandra_wcl),
            memcache=perma_memcache,
            lock_factory=self.make_lock)

        self.cache_chains.update(permacache=self.permacache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        if self.stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                CMemcache(self.stalecaches, num_clients=num_mc_clients),
                self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.rendercaches,
                       noreply=True,
                       no_block=True,
                       num_clients=num_mc_clients)))
        self.cache_chains.update(rendercache=self.rendercache)

        self.servicecache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.servicecaches, num_clients=num_mc_clients)))
        self.cache_chains.update(servicecache=self.servicecache)

        self.thing_cache = CacheChain((localcache_cls(), ))
        self.cache_chains.update(thing_cache=self.thing_cache)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True)
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()

        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])

        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print("Warning: g.media_domain == g.domain. " +
                  "This may give untrusted content access to user cookies")

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        #the shutdown toggle
        self.shutdown = False

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision number
        try:
            self.version = subprocess.check_output(
                ["git", "rev-parse", "HEAD"])
        except subprocess.CalledProcessError, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'
コード例 #13
0
ファイル: app_globals.py プロジェクト: whatisaphone/lesswrong
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.
        
        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.
            
        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.
            
        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.
            
        """

        def to_bool(x):
            return (x.lower() == 'true') if x else None
        
        def to_iter(name, delim = ','):
            return (x.strip() for x in global_conf.get(name, '').split(delim))


        # slop over all variables to start with
        for k, v in  global_conf.iteritems():
            if not k.startswith("_") and not hasattr(self, k):
                if k in self.int_props:
                    v = int(v)
                elif k in self.bool_props:
                    v = to_bool(v)
                elif k in self.tuple_props:
                    v = tuple(to_iter(k))
                setattr(self, k, v)

        # initialize caches
        mc = Memcache(self.memcaches, debug=True)
        self.cache = CacheChain((LocalCache(), mc))
        self.permacache = Memcache(self.permacaches, debug=True)
        self.rendercache = Memcache(self.rendercaches, debug=True)
        self.make_lock = make_lock_factory(mc)

        self.rec_cache = Memcache(self.rec_cache, debug=True)
        
        # set default time zone if one is not set
        self.tz = pytz.timezone(global_conf.get('timezone'))

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        # turn on for language support
        self.languages, self.lang_name = _get_languages()

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        # load the md5 hashes of files under static
        static_files = os.path.join(paths.get('static_files'), 'static')
        self.static_md5 = {}
        if os.path.exists(static_files):
            for f in os.listdir(static_files):
                if f.endswith('.md5'):
                    key = f[0:-4]
                    f = os.path.join(static_files, f)
                    with open(f, 'r') as handle:
                        md5 = handle.read().strip('\n')
                    self.static_md5[key] = md5


        #set up the logging directory
        log_path = self.log_path
        process_iden = global_conf.get('scgi_port', 'default')
        if log_path:
            if not os.path.exists(log_path):
                os.makedirs(log_path)
            for fname in os.listdir(log_path):
                if fname.startswith(process_iden):
                    full_name = os.path.join(log_path, fname)
                    os.remove(full_name)

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)

        #read in our CSS so that it can become a default for subreddit
        #stylesheets
        stylesheet_path = os.path.join(paths.get('static_files'),
                                       self.static_path.lstrip('/'),
                                       self.stylesheet)
        with open(stylesheet_path) as s:
            self.default_stylesheet = s.read()

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()
コード例 #14
0
ファイル: app_globals.py プロジェクト: Anenome/reddit
    def setup(self):
        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = {}

        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper,
                                          LiveConfig, LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
                                                             zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.throttles = LiveList(self.zookeeper, "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.throttles = tuple()  # immutable since it's not real

        self.memcache = CMemcache(self.memcaches, num_clients = num_mc_clients)
        self.lock_cache = CMemcache(self.lockcaches, num_clients=num_mc_clients)

        self.stats = Stats(self.config.get('statsd_addr'),
                           self.config.get('statsd_sample_rate'))

        event.listens_for(engine.Engine, 'before_cursor_execute')(
            self.stats.pg_before_cursor_execute)
        event.listens_for(engine.Engine, 'after_cursor_execute')(
            self.stats.pg_after_cursor_execute)

        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")


        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=2,
                    max_retries=3,
                    prefill=False
                ),
        }

        perma_memcache = (CMemcache(self.permacache_memcaches, num_clients = num_mc_clients)
                          if self.permacache_memcaches
                          else None)
        self.permacache = CassandraCacheChain(localcache_cls(),
                                              CassandraCache('permacache',
                                                             self.cassandra_pools[self.cassandra_default_pool],
                                                             read_consistency_level = self.cassandra_rcl,
                                                             write_consistency_level = self.cassandra_wcl),
                                              memcache = perma_memcache,
                                              lock_factory = self.make_lock)

        self.cache_chains.update(permacache=self.permacache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        if self.stalecaches:
            self.cache = StaleCacheChain(localcache_cls(),
                                         CMemcache(self.stalecaches, num_clients=num_mc_clients),
                                         self.memcache)
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain((localcache_cls(),
                                          CMemcache(self.rendercaches,
                                                    noreply=True, no_block=True,
                                                    num_clients = num_mc_clients)))
        self.cache_chains.update(rendercache=self.rendercache)

        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.update(thing_cache=self.thing_cache)

        #load the database info
        self.dbm = self.load_db_params()

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain((localcache_cls(),
                                         self.memcache,
                                         HardCache(self)),
                                        cache_negative_results = True)
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])
        
        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)
        if getattr(self, 'oauth_domain', None):
            self.secure_domains.add(self.oauth_domain)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        if self.log_start:
            self.log.error("reddit app %s:%s started %s at %s" %
                           (self.reddit_host, self.reddit_pid,
                            self.short_version, datetime.now()))
コード例 #15
0
    def setup(self):
        self.queues = queues.declare_queues(self)

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        # XXX: get rid of these options. new query cache is always on.
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain
        self.secure_domains = set([urlparse(self.payment_domain).netloc])

        self.trusted_domains = set([self.domain])
        self.trusted_domains.update(self.authorized_cnames)
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.secure_domains.add(https_url.netloc)
            self.trusted_domains.add(https_url.hostname)
        if getattr(self, 'oauth_domain', None):
            self.secure_domains.add(self.oauth_domain)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print ("Warning: g.media_domain == g.domain. " +
                   "This may give untrusted content access to user cookies")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper,
                                          LiveConfig, LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts, (zk_username,
                                                             zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.throttles = LiveList(self.zookeeper, "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.throttles = tuple()  # immutable since it's not real
        self.startup_timer.intermediate("zookeeper")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # the main memcache pool. used for most everything.
        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)

        # a smaller pool of caches used only for distributed locks.
        # TODO: move this to ZooKeeper
        self.lock_cache = CMemcache(self.lockcaches,
                                    num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        if self.permacache_memcaches:
            permacache_memcaches = CMemcache(self.permacache_memcaches,
                                             num_clients=num_mc_clients)
        else:
            permacache_memcaches = None

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(self.stalecaches,
                                    num_clients=num_mc_clients)
        else:
            stalecaches = None

        # rendercache holds rendered partial templates as well as fully
        # cached pages.
        rendercaches = CMemcache(
            self.rendercaches,
            noreply=True,
            no_block=True,
            num_clients=num_mc_clients,
        )

        self.startup_timer.intermediate("memcache")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
                StatsCollectingConnectionPool(
                    keyspace,
                    stats=self.stats,
                    logging_name="main",
                    server_list=self.cassandra_seeds,
                    pool_size=self.cassandra_pool_size,
                    timeout=2,
                    max_retries=3,
                    prefill=False
                ),
        }

        permacache_cf = CassandraCache(
            'permacache',
            self.cassandra_pools[self.cassandra_default_pool],
            read_consistency_level=self.cassandra_rcl,
            write_consistency_level=self.cassandra_wcl
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        event.listens_for(engine.Engine, 'before_cursor_execute')(
            self.stats.pg_before_cursor_execute)
        event.listens_for(engine.Engine, 'after_cursor_execute')(
            self.stats.pg_after_cursor_execute)

        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        self.cache_chains = {}
        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)

        if stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                self.memcache,
            )
        else:
            self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.update(cache=self.cache)

        self.rendercache = MemcacheChain((
            localcache_cls(),
            rendercaches,
        ))
        self.cache_chains.update(rendercache=self.rendercache)

        # the thing_cache is used in tdb_cassandra.
        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.update(thing_cache=self.thing_cache)

        self.permacache = CassandraCacheChain(
            localcache_cls(),
            permacache_cf,
            memcache=permacache_memcaches,
            lock_factory=self.make_lock,
        )
        self.cache_chains.update(permacache=self.permacache)

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True,
        )
        self.cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains.copy()
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                chain.stats = CacheStats(self.stats, name)

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        self.startup_timer.intermediate("revisions")
コード例 #16
0
ファイル: app_globals.py プロジェクト: sshixin/reddit
    def setup(self):
        self.queues = queues.declare_queues(self)

        self.extension_subdomains = dict(
            m="mobile",
            i="compact",
            api="api",
            rss="rss",
            xml="xml",
            json="json",
        )

        ################# PROVIDERS
        self.auth_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.auth",
            self.authentication_provider,
        )
        self.media_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.media",
            self.media_provider,
        )
        self.cdn_provider = select_provider(
            self.config,
            self.pkg_resources_working_set,
            "r2.provider.cdn",
            self.cdn_provider,
        )
        self.startup_timer.intermediate("providers")

        ################# CONFIGURATION
        # AMQP is required
        if not self.amqp_host:
            raise ValueError("amqp_host not set in the .ini")

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")

        # heavy load mode is read only mode with a different infobar
        if self.heavy_load_mode:
            self.read_only_mode = True

        origin_prefix = self.domain_prefix + "." if self.domain_prefix else ""
        self.origin = "http://" + origin_prefix + self.domain

        self.trusted_domains = set([self.domain])
        if self.https_endpoint:
            https_url = urlparse(self.https_endpoint)
            self.trusted_domains.add(https_url.hostname)

        # load the unique hashed names of files under static
        static_files = os.path.join(self.paths.get('static_files'), 'static')
        names_file_path = os.path.join(static_files, 'names.json')
        if os.path.exists(names_file_path):
            with open(names_file_path) as handle:
                self.static_names = json.load(handle)
        else:
            self.static_names = {}

        # make python warnings go through the logging system
        logging.captureWarnings(capture=True)

        log = logging.getLogger('reddit')

        # when we're a script (paster run) just set up super simple logging
        if self.running_as_script:
            log.setLevel(logging.INFO)
            log.addHandler(logging.StreamHandler())

        # if in debug mode, override the logging level to DEBUG
        if self.debug:
            log.setLevel(logging.DEBUG)

        # attempt to figure out which pool we're in and add that to the
        # LogRecords.
        try:
            with open("/etc/ec2_asg", "r") as f:
                pool = f.read().strip()
            # clean up the pool name since we're putting stuff after "-"
            pool = pool.partition("-")[0]
        except IOError:
            pool = "reddit-app"
        self.log = logging.LoggerAdapter(log, {"pool": pool})

        # set locations
        locations = pkg_resources.resource_stream(__name__,
                                                  "../data/locations.json")
        self.locations = json.loads(locations.read())

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print >> sys.stderr, (
                "Warning: g.media_domain == g.domain. " +
                "This may give untrusted content access to user cookies")
        if self.oauth_domain == self.domain:
            print >> sys.stderr, ("Warning: g.oauth_domain == g.domain. "
                                  "CORS requests to g.domain will be allowed")

        for arg in sys.argv:
            tokens = arg.split("=")
            if len(tokens) == 2:
                k, v = tokens
                self.log.debug("Overriding g.%s to %s" % (k, v))
                setattr(self, k, v)

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        if hasattr(signal, 'SIGUSR1'):
            # not all platforms have user signals
            signal.signal(signal.SIGUSR1, thread_dump)

        locale.setlocale(locale.LC_ALL, self.locale)

        # Pre-calculate ratelimit values
        self.RL_RESET_SECONDS = self.config["RL_RESET_MINUTES"] * 60
        self.RL_MAX_REQS = int(self.config["RL_AVG_REQ_PER_SEC"] *
                               self.RL_RESET_SECONDS)

        self.RL_OAUTH_RESET_SECONDS = self.config["RL_OAUTH_RESET_MINUTES"] * 60
        self.RL_OAUTH_MAX_REQS = int(self.config["RL_OAUTH_AVG_REQ_PER_SEC"] *
                                     self.RL_OAUTH_RESET_SECONDS)

        self.RL_LOGIN_MAX_REQS = int(self.config["RL_LOGIN_AVG_PER_SEC"] *
                                     self.RL_RESET_SECONDS)

        self.startup_timer.intermediate("configuration")

        ################# ZOOKEEPER
        # for now, zookeeper will be an optional part of the stack.
        # if it's not configured, we will grab the expected config from the
        # [live_config] section of the ini file
        zk_hosts = self.config.get("zookeeper_connection_string")
        if zk_hosts:
            from r2.lib.zookeeper import (connect_to_zookeeper, LiveConfig,
                                          LiveList)
            zk_username = self.config["zookeeper_username"]
            zk_password = self.config["zookeeper_password"]
            self.zookeeper = connect_to_zookeeper(zk_hosts,
                                                  (zk_username, zk_password))
            self.live_config = LiveConfig(self.zookeeper, LIVE_CONFIG_NODE)
            self.secrets = fetch_secrets(self.zookeeper)
            self.throttles = LiveList(self.zookeeper,
                                      "/throttles",
                                      map_fn=ipaddress.ip_network,
                                      reduce_fn=ipaddress.collapse_addresses)

            # close our zk connection when the app shuts down
            SHUTDOWN_CALLBACKS.append(self.zookeeper.stop)
        else:
            self.zookeeper = None
            parser = ConfigParser.RawConfigParser()
            parser.optionxform = str
            parser.read([self.config["__file__"]])
            self.live_config = extract_live_config(parser, self.plugins)
            self.secrets = extract_secrets(parser)
            self.throttles = tuple()  # immutable since it's not real

        self.startup_timer.intermediate("zookeeper")

        ################# PRIVILEGED USERS
        self.admins = PermissionFilteredEmployeeList(self.live_config,
                                                     type="admin")
        self.sponsors = PermissionFilteredEmployeeList(self.live_config,
                                                       type="sponsor")
        self.employees = PermissionFilteredEmployeeList(self.live_config,
                                                        type="employee")

        ################# MEMCACHE
        num_mc_clients = self.num_mc_clients

        # the main memcache pool. used for most everything.
        memcache = CMemcache(
            self.memcaches,
            min_compress_len=1400,
            num_clients=num_mc_clients,
            binary=True,
        )

        # a pool just used for @memoize results
        memoizecaches = CMemcache(
            self.memoizecaches,
            min_compress_len=50 * 1024,
            num_clients=num_mc_clients,
            binary=True,
        )

        # a pool just for srmember rels
        srmembercaches = CMemcache(
            self.srmembercaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
            binary=True,
        )

        # a pool just for rels
        relcaches = CMemcache(
            self.relcaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
            binary=True,
        )

        ratelimitcaches = CMemcache(
            self.ratelimitcaches,
            min_compress_len=96,
            num_clients=num_mc_clients,
        )

        # a smaller pool of caches used only for distributed locks.
        # TODO: move this to ZooKeeper
        self.lock_cache = CMemcache(self.lockcaches,
                                    binary=True,
                                    num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.lock_cache, self.stats)

        # memcaches used in front of the permacache CF in cassandra.
        # XXX: this is a legacy thing; permacache was made when C* didn't have
        # a row cache.
        permacache_memcaches = CMemcache(self.permacache_memcaches,
                                         min_compress_len=1400,
                                         num_clients=num_mc_clients)

        # the stalecache is a memcached local to the current app server used
        # for data that's frequently fetched but doesn't need to be fresh.
        if self.stalecaches:
            stalecaches = CMemcache(self.stalecaches,
                                    binary=True,
                                    num_clients=num_mc_clients)
        else:
            stalecaches = None

        # rendercache holds rendered partial templates.
        rendercaches = CMemcache(
            self.rendercaches,
            noreply=True,
            no_block=True,
            num_clients=num_mc_clients,
            min_compress_len=480,
        )

        # pagecaches hold fully rendered pages
        pagecaches = CMemcache(
            self.pagecaches,
            noreply=True,
            no_block=True,
            num_clients=num_mc_clients,
            min_compress_len=1400,
        )

        self.startup_timer.intermediate("memcache")

        ################# CASSANDRA
        keyspace = "reddit"
        self.cassandra_pools = {
            "main":
            StatsCollectingConnectionPool(keyspace,
                                          stats=self.stats,
                                          logging_name="main",
                                          server_list=self.cassandra_seeds,
                                          pool_size=self.cassandra_pool_size,
                                          timeout=4,
                                          max_retries=3,
                                          prefill=False),
        }

        permacache_cf = Permacache._setup_column_family(
            'permacache',
            self.cassandra_pools[self.cassandra_default_pool],
        )

        self.startup_timer.intermediate("cassandra")

        ################# POSTGRES
        self.dbm = self.load_db_params()
        self.startup_timer.intermediate("postgres")

        ################# CHAINS
        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components
        cache_chains = {}
        localcache_cls = (SelfEmptyingCache
                          if self.running_as_script else LocalCache)

        if stalecaches:
            self.cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                memcache,
            )
        else:
            self.cache = MemcacheChain((localcache_cls(), memcache))
        cache_chains.update(cache=self.cache)

        if stalecaches:
            self.memoizecache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                memoizecaches,
            )
        else:
            self.memoizecache = MemcacheChain(
                (localcache_cls(), memoizecaches))
        cache_chains.update(memoizecache=self.memoizecache)

        if stalecaches:
            self.srmembercache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                srmembercaches,
            )
        else:
            self.srmembercache = MemcacheChain(
                (localcache_cls(), srmembercaches))
        cache_chains.update(srmembercache=self.srmembercache)

        if stalecaches:
            self.relcache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                relcaches,
            )
        else:
            self.relcache = MemcacheChain((localcache_cls(), relcaches))
        cache_chains.update(relcache=self.relcache)

        self.ratelimitcache = MemcacheChain(
            (localcache_cls(), ratelimitcaches))
        cache_chains.update(ratelimitcache=self.ratelimitcache)

        self.rendercache = MemcacheChain((
            localcache_cls(),
            rendercaches,
        ))
        cache_chains.update(rendercache=self.rendercache)

        self.pagecache = MemcacheChain((
            localcache_cls(),
            pagecaches,
        ))
        cache_chains.update(pagecache=self.pagecache)

        # the thing_cache is used in tdb_cassandra.
        self.thing_cache = CacheChain((localcache_cls(), ), check_keys=False)
        cache_chains.update(thing_cache=self.thing_cache)

        if stalecaches:
            permacache_cache = StaleCacheChain(
                localcache_cls(),
                stalecaches,
                permacache_memcaches,
                check_keys=False,
            )
        else:
            permacache_cache = CacheChain(
                (localcache_cls(), permacache_memcaches),
                check_keys=False,
            )
        cache_chains.update(permacache=permacache_cache)

        self.permacache = Permacache(
            permacache_cache,
            permacache_cf,
            lock_factory=self.make_lock,
        )

        # hardcache is used for various things that tend to expire
        # TODO: replace hardcache w/ cassandra stuff
        self.hardcache = HardcacheChain(
            (localcache_cls(), memcache, HardCache(self)),
            cache_negative_results=True,
        )
        cache_chains.update(hardcache=self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        def reset_caches():
            for name, chain in cache_chains.iteritems():
                chain.reset()
                if isinstance(chain, StaleCacheChain):
                    chain.stats = StaleCacheStats(self.stats, name)
                else:
                    chain.stats = CacheStats(self.stats, name)

        self.cache_chains = cache_chains

        self.reset_caches = reset_caches
        self.reset_caches()

        self.startup_timer.intermediate("cache_chains")

        # try to set the source control revision numbers
        self.versions = {}
        r2_root = os.path.dirname(os.path.dirname(self.paths["root"]))
        r2_gitdir = os.path.join(r2_root, ".git")
        self.short_version = self.record_repo_version("r2", r2_gitdir)

        if I18N_PATH:
            i18n_git_path = os.path.join(os.path.dirname(I18N_PATH), ".git")
            self.record_repo_version("i18n", i18n_git_path)

        self.startup_timer.intermediate("revisions")
コード例 #17
0
ファイル: app_globals.py プロジェクト: kevinrose/diggit
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.
        
        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.
            
        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.
            
        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.
            
        """

        # slop over all variables to start with
        for k, v in global_conf.iteritems():
            if not k.startswith("_") and not hasattr(self, k):
                if k in self.int_props:
                    v = int(v)
                elif k in self.float_props:
                    v = float(v)
                elif k in self.bool_props:
                    v = self.to_bool(v)
                elif k in self.tuple_props:
                    v = tuple(self.to_iter(v))
                setattr(self, k, v)

        self.paid_sponsors = set(x.lower() for x in self.paid_sponsors)

        # initialize caches
        mc = Memcache(self.memcaches, pickleProtocol=1)
        self.memcache = mc
        self.cache = CacheChain((LocalCache(), mc))
        self.permacache = Memcache(self.permacaches, pickleProtocol=1)
        self.rendercache = Memcache(self.rendercaches, pickleProtocol=1)
        self.make_lock = make_lock_factory(mc)

        self.rec_cache = Memcache(self.rec_cache, pickleProtocol=1)

        # set default time zone if one is not set
        tz = global_conf.get("timezone")
        dtz = global_conf.get("display_timezone", tz)

        self.tz = pytz.timezone(tz)
        self.display_tz = pytz.timezone(dtz)

        # load the database info
        self.dbm = self.load_db_params(global_conf)

        # make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get("REDDIT_MAIN"))

        # turn on for language support
        self.languages, self.lang_name = get_active_langs(default_lang=self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        # load the md5 hashes of files under static
        static_files = os.path.join(paths.get("static_files"), "static")
        self.static_md5 = {}
        if os.path.exists(static_files):
            for f in os.listdir(static_files):
                if f.endswith(".md5"):
                    key = f.strip(".md5")
                    f = os.path.join(static_files, f)
                    with open(f, "r") as handle:
                        md5 = handle.read().strip("\n")
                    self.static_md5[key] = md5

        # set up the logging directory
        log_path = self.log_path
        process_iden = global_conf.get("scgi_port", "default")
        if log_path:
            if not os.path.exists(log_path):
                os.makedirs(log_path)
            for fname in os.listdir(log_path):
                if fname.startswith(process_iden):
                    full_name = os.path.join(log_path, fname)
                    os.remove(full_name)

        # setup the logger
        self.log = logging.getLogger("digg")
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.WARNING)

        # set log level for pycountry which is chatty
        logging.getLogger("pycountry.db").setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print "Warning: g.media_domain == g.domain. This may give untrusted content access to user cookies"

        # read in our CSS so that it can become a default for subdigg
        # stylesheets
        stylesheet_path = os.path.join(paths.get("static_files"), self.static_path.lstrip("/"), self.stylesheet)
        with open(stylesheet_path) as s:
            self.default_stylesheet = s.read()

        self.digg_host = socket.gethostname()
        self.digg_pid = os.getpid()

        # the shutdown toggle
        self.shutdown = False

        # if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")