Esempio n. 1
0
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect_thread_local('Keyspace1', credentials=credentials)

        self.cf       = ColumnFamily(self.client, 'Standard2')

        self.cf_long  = ColumnFamily(self.client, 'StdLong')
        self.cf_int   = ColumnFamily(self.client, 'StdInteger')
        self.cf_time  = ColumnFamily(self.client, 'StdTimeUUID')
        self.cf_lex   = ColumnFamily(self.client, 'StdLexicalUUID')
        self.cf_ascii = ColumnFamily(self.client, 'StdAscii')
        self.cf_utf8  = ColumnFamily(self.client, 'StdUTF8')
        self.cf_bytes = ColumnFamily(self.client, 'StdBytes')

        self.cf_suplong  = ColumnFamily(self.client, 'SuperLong', super=True)
        self.cf_supint   = ColumnFamily(self.client, 'SuperInt', super=True)
        self.cf_suptime  = ColumnFamily(self.client, 'SuperTime', super=True)
        self.cf_suplex   = ColumnFamily(self.client, 'SuperLex', super=True)
        self.cf_supascii = ColumnFamily(self.client, 'SuperAscii', super=True)
        self.cf_suputf8  = ColumnFamily(self.client, 'SuperUTF8', super=True)
        self.cf_supbytes = ColumnFamily(self.client, 'SuperBytes', super=True)

        self.cf_suplong_sublong  = ColumnFamily(self.client, 'SuperLongSubLong', super=True)
        self.cf_suplong_subint   = ColumnFamily(self.client, 'SuperLongSubInt', super=True)
        self.cf_suplong_subtime  = ColumnFamily(self.client, 'SuperLongSubTime', super=True)
        self.cf_suplong_sublex   = ColumnFamily(self.client, 'SuperLongSubLex', super=True)
        self.cf_suplong_subascii = ColumnFamily(self.client, 'SuperLongSubAscii', super=True)
        self.cf_suplong_subutf8  = ColumnFamily(self.client, 'SuperLongSubUTF8', super=True)
        self.cf_suplong_subbytes = ColumnFamily(self.client, 'SuperLongSubBytes', super=True)

        self.cf_valid_long = ColumnFamily(self.client, 'ValidatorLong')
        self.cf_valid_int = ColumnFamily(self.client, 'ValidatorInt')
        self.cf_valid_time = ColumnFamily(self.client, 'ValidatorTime')
        self.cf_valid_lex = ColumnFamily(self.client, 'ValidatorLex')
        self.cf_valid_ascii = ColumnFamily(self.client, 'ValidatorAscii')
        self.cf_valid_utf8 = ColumnFamily(self.client, 'ValidatorUTF8')
        self.cf_valid_bytes = ColumnFamily(self.client, 'ValidatorBytes')

        self.cfs = [self.cf_long, self.cf_int, self.cf_time, self.cf_lex,
                    self.cf_ascii, self.cf_utf8, self.cf_bytes,
                    self.cf_suplong, self.cf_supint, self.cf_suptime,
                    self.cf_suplex, self.cf_supascii, self.cf_suputf8,
                    self.cf_supbytes,
                    self.cf_suplong_subint, self.cf_suplong_subint,
                    self.cf_suplong_subtime, self.cf_suplong_sublex,
                    self.cf_suplong_subascii, self.cf_suplong_subutf8,
                    self.cf_suplong_subbytes,
                    self.cf_valid_long, self.cf_valid_int, self.cf_valid_time,
                    self.cf_valid_lex, self.cf_valid_ascii, self.cf_valid_utf8,
                    self.cf_valid_bytes]

        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()
Esempio n. 2
0
def test_connections():
    version = connect("Keyspace1").describe_version()

    thread_local = connect_thread_local("Keyspace1")
    threads = []
    for i in xrange(10):
        threads.append(threading.Thread(target=version_check, args=(thread_local, version)))
        threads[-1].start()
    for thread in threads:
        thread.join()
Esempio n. 3
0
def test_connections():
    version = connect().get_string_property('version')

    thread_local = connect_thread_local()
    threads = []
    for i in xrange(10):
        threads.append(threading.Thread(target=version_check,
                                        args=(thread_local, version)))
        threads[-1].start()
    for thread in threads:
        thread.join()
Esempio n. 4
0
 def __init__(self, keyspace, column_family, seeds,
              read_consistency_level = CL_ONE,
              write_consistency_level = CL_ONE):
     self.keyspace = keyspace
     self.column_family = column_family
     self.seeds = seeds
     self.client = pycassa.connect_thread_local(seeds)
     self.cf = pycassa.ColumnFamily(self.client, self.keyspace,
                                    self.column_family,
                                    read_consistency_level = read_consistency_level,
                                    write_consistency_level = write_consistency_level)
Esempio n. 5
0
 def __init__(self, keyspace, column_family, seeds,
              read_consistency_level = CL_QUORUM,
              write_consistency_level = CL_QUORUM):
     self.keyspace = keyspace
     self.column_family = column_family
     self.seeds = seeds
     self.client = pycassa.connect_thread_local(seeds)
     self.cf = pycassa.ColumnFamily(self.client, self.keyspace,
                                    self.column_family,
                                    read_consistency_level = read_consistency_level,
                                    write_consistency_level = write_consistency_level)
Esempio n. 6
0
def test_connections():
    version = connect().get_string_property('version')

    thread_local = connect_thread_local()
    threads = []
    for i in xrange(10):
        threads.append(
            threading.Thread(target=version_check,
                             args=(thread_local, version)))
        threads[-1].start()
    for thread in threads:
        thread.join()
Esempio n. 7
0
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect_thread_local('Keyspace1', credentials=credentials)
        self.cf = ColumnFamily(self.client, 'Super2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               super=True)

        try:
            self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()
Esempio n. 8
0
    def setUp(self):
        self.client = connect_thread_local()
        self.client.login('Keyspace1', {'username': '******', 'password': '******'})
        self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2',
                               write_consistency_level=ConsistencyLevel.ONE,
                               buffer_size=2, timestamp=self.timestamp,
                               super=True)

        try:
            self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()
 def setUp(self):
     credentials = {'username': '******', 'password': '******'}
     self.client = connect_thread_local('Keyspace1', credentials=credentials)
     self.cf = ColumnFamily(self.client, 'Super2',
                            write_consistency_level=ConsistencyLevel.ONE,
                            timestamp=self.timestamp,
                            super=True)
     self.map = ColumnFamilyMap(TestUTF8, self.cf)
     try:
         self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
     except NotFoundException:
         self.timestamp_n = 0
     self.clear()
Esempio n. 10
0
 def setUp(self):
     self.client = connect_thread_local()
     self.client.login('Keyspace1', {'username': '******', 'password': '******'})
     self.cf = ColumnFamily(self.client, 'Keyspace1', 'Super2',
                            write_consistency_level=ConsistencyLevel.ONE,
                            timestamp=self.timestamp,
                            super=True)
     self.map = ColumnFamilyMap(TestUTF8, self.cf)
     try:
         self.timestamp_n = int(self.cf.get('meta')['meta']['timestamp'])
     except NotFoundException:
         self.timestamp_n = 0
     self.clear()
Esempio n. 11
0
    def test_connections(self):
        def version_check(connection, version):
            assert connection.describe_version() == version

        version = connect('Keyspace1').describe_version()

        thread_local = connect_thread_local('Keyspace1')
        threads = []
        for i in xrange(10):
            threads.append(threading.Thread(target=version_check,
                                            args=(thread_local, version)))
            threads[-1].start()
        for thread in threads:
            thread.join()
Esempio n. 12
0
    def __init__(self, init_args):
        import pycassa
        import cassandra.ttypes

        seed, keyspace, column_family,seen_cf = init_args.split(',')
        self.seeds = [seed]
        self.keyspace = keyspace
        self.column_family = column_family

        self.client = pycassa.connect_thread_local(self.seeds)
        self.cf = pycassa.ColumnFamily(self.client, self.keyspace,
                                       self.column_family)
        self.seen_cf = pycassa.ColumnFamily(self.client, self.keyspace,
                                            seen_cf)
Esempio n. 13
0
    def test_connections(self):
        def version_check(connection, version):
            assert connection.describe_version() == version

        version = connect('Keyspace1').describe_version()

        thread_local = connect_thread_local('Keyspace1')
        threads = []
        for i in xrange(10):
            threads.append(
                threading.Thread(target=version_check,
                                 args=(thread_local, version)))
            threads[-1].start()
        for thread in threads:
            thread.join()
Esempio n. 14
0
def get_client(keyspace):
    config = settings.get_config()

    logger.info("get_client: servers: %s, timeout: %s, user: %s, pass: %s" % (
        config.servers,
        config.connection_timeout,
        config.username,
        config.password))

    client = pycassa.connect_thread_local(
                config.servers,
                timeout=config.connection_timeout
            )

    cred = dict(username=config.username, password=config.password)
    client.login(keyspace, cred)
    logger.info("get_client => %s" % repr(client))
    return client
Esempio n. 15
0
    def setUp(self):
        credentials = {'username': '******', 'password': '******'}
        self.client = connect_thread_local('Keyspace1',
                                           credentials=credentials)

        self.cf = ColumnFamily(self.client, 'Standard2')

        self.cf_long = ColumnFamily(self.client, 'StdLong')
        self.cf_int = ColumnFamily(self.client, 'StdInteger')
        self.cf_time = ColumnFamily(self.client, 'StdTimeUUID')
        self.cf_lex = ColumnFamily(self.client, 'StdLexicalUUID')
        self.cf_ascii = ColumnFamily(self.client, 'StdAscii')
        self.cf_utf8 = ColumnFamily(self.client, 'StdUTF8')
        self.cf_bytes = ColumnFamily(self.client, 'StdBytes')

        self.cf_suplong = ColumnFamily(self.client, 'SuperLong', super=True)
        self.cf_supint = ColumnFamily(self.client, 'SuperInt', super=True)
        self.cf_suptime = ColumnFamily(self.client, 'SuperTime', super=True)
        self.cf_suplex = ColumnFamily(self.client, 'SuperLex', super=True)
        self.cf_supascii = ColumnFamily(self.client, 'SuperAscii', super=True)
        self.cf_suputf8 = ColumnFamily(self.client, 'SuperUTF8', super=True)
        self.cf_supbytes = ColumnFamily(self.client, 'SuperBytes', super=True)

        self.cf_suplong_sublong = ColumnFamily(self.client,
                                               'SuperLongSubLong',
                                               super=True)
        self.cf_suplong_subint = ColumnFamily(self.client,
                                              'SuperLongSubInt',
                                              super=True)
        self.cf_suplong_subtime = ColumnFamily(self.client,
                                               'SuperLongSubTime',
                                               super=True)
        self.cf_suplong_sublex = ColumnFamily(self.client,
                                              'SuperLongSubLex',
                                              super=True)
        self.cf_suplong_subascii = ColumnFamily(self.client,
                                                'SuperLongSubAscii',
                                                super=True)
        self.cf_suplong_subutf8 = ColumnFamily(self.client,
                                               'SuperLongSubUTF8',
                                               super=True)
        self.cf_suplong_subbytes = ColumnFamily(self.client,
                                                'SuperLongSubBytes',
                                                super=True)

        self.cf_valid_long = ColumnFamily(self.client, 'ValidatorLong')
        self.cf_valid_int = ColumnFamily(self.client, 'ValidatorInt')
        self.cf_valid_time = ColumnFamily(self.client, 'ValidatorTime')
        self.cf_valid_lex = ColumnFamily(self.client, 'ValidatorLex')
        self.cf_valid_ascii = ColumnFamily(self.client, 'ValidatorAscii')
        self.cf_valid_utf8 = ColumnFamily(self.client, 'ValidatorUTF8')
        self.cf_valid_bytes = ColumnFamily(self.client, 'ValidatorBytes')

        self.cf_def_valid = ColumnFamily(self.client, 'DefaultValidator')

        self.cfs = [
            self.cf_long,
            self.cf_int,
            self.cf_time,
            self.cf_lex,
            self.cf_ascii,
            self.cf_utf8,
            self.cf_bytes,
            #
            self.cf_suplong,
            self.cf_supint,
            self.cf_suptime,
            self.cf_suplex,
            self.cf_supascii,
            self.cf_suputf8,
            self.cf_supbytes,
            #
            self.cf_suplong_subint,
            self.cf_suplong_subint,
            self.cf_suplong_subtime,
            self.cf_suplong_sublex,
            self.cf_suplong_subascii,
            self.cf_suplong_subutf8,
            self.cf_suplong_subbytes,
            #
            self.cf_valid_long,
            self.cf_valid_int,
            self.cf_valid_time,
            self.cf_valid_lex,
            self.cf_valid_ascii,
            self.cf_valid_utf8,
            self.cf_valid_bytes,
            #
            self.cf_def_valid,
        ]

        try:
            self.timestamp_n = int(self.cf.get('meta')['timestamp'])
        except NotFoundException:
            self.timestamp_n = 0
        self.clear()
Esempio n. 16
0
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.

        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.

        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.

        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.

        """

        # slop over all variables to start with
        for k, v in  global_conf.iteritems():
            if not k.startswith("_") and not hasattr(self, k):
                if k in self.int_props:
                    v = int(v)
                elif k in self.float_props:
                    v = float(v)
                elif k in self.bool_props:
                    v = self.to_bool(v)
                elif k in self.tuple_props:
                    v = tuple(self.to_iter(v))
                elif k in self.choice_props:
                    if v not in self.choice_props[k]:
                        raise ValueError("Unknown option for %r: %r not in %r"
                                         % (k, v, self.choice_props[k]))
                    v = self.choice_props[k][v]
                setattr(self, k, v)

        self.running_as_script = global_conf.get('running_as_script', False)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache if self.running_as_script
                          else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = []

        self.memcache = CMemcache(self.memcaches, num_clients = num_mc_clients)
        self.make_lock = make_lock_factory(self.memcache)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")
        if not self.url_seeds:
            raise ValueError("url_seeds not set in the .ini")
        self.cassandra_seeds = list(self.cassandra_seeds)
        random.shuffle(self.cassandra_seeds)
        self.cassandra = pycassa.connect_thread_local(self.cassandra_seeds)
        perma_memcache = (CMemcache(self.permacache_memcaches, num_clients = num_mc_clients)
                          if self.permacache_memcaches
                          else None)
        self.permacache = self.init_cass_cache('permacache', 'permacache',
                                               self.cassandra,
                                               self.make_lock,
                                               memcache = perma_memcache,
                                               read_consistency_level = self.cassandra_rcl,
                                               write_consistency_level = self.cassandra_wcl,
                                               localcache_cls = localcache_cls)
        self.cache_chains.append(self.permacache)

        self.url_seeds = list(self.url_seeds)
        random.shuffle(self.url_seeds)
        self.url_cassandra = pycassa.connect_thread_local(self.url_seeds)
        self.urlcache = self.init_cass_cache('urls', 'urls',
                                             self.url_cassandra,
                                             self.make_lock,
                                             # until we've merged this
                                             # with the regular
                                             # cluster, this will
                                             # always be CL_ONE
                                             read_consistency_level = CL_ONE,
                                             write_consistency_level = CL_ONE,
                                             localcache_cls = localcache_cls)
        self.cache_chains.append(self.urlcache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.append(self.cache)

        self.rendercache = MemcacheChain((localcache_cls(),
                                          CMemcache(self.rendercaches,
                                                    noreply=True, no_block=True,
                                                    num_clients = num_mc_clients)))
        self.cache_chains.append(self.rendercache)

        self.servicecache = MemcacheChain((localcache_cls(),
                                           CMemcache(self.servicecaches,
                                                     num_clients = num_mc_clients)))
        self.cache_chains.append(self.servicecache)

        self.thing_cache = CacheChain((localcache_cls(),))
        self.cache_chains.append(self.thing_cache)

        # set default time zone if one is not set
        tz = global_conf.get('timezone')
        dtz = global_conf.get('display_timezone', tz)

        self.tz = pytz.timezone(tz)
        self.display_tz = pytz.timezone(dtz)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain((localcache_cls(),
                                         self.memcache,
                                         HardCache(self)),
                                        cache_negative_results = True)
        self.cache_chains.append(self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains[::]
        def reset_caches():
            for chain in cache_chains:
                chain.reset()

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        # turn on for language support
        self.languages, self.lang_name = \
                        get_active_langs(default_lang= self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        self.paths = paths

        # load the md5 hashes of files under static
        static_files = os.path.join(paths.get('static_files'), 'static')
        self.static_md5 = {}
        if os.path.exists(static_files):
            for f in os.listdir(static_files):
                if f.endswith('.md5'):
                    key = f[0:-4]
                    f = os.path.join(static_files, f)
                    with open(f, 'r') as handle:
                        md5 = handle.read().strip('\n')
                    self.static_md5[key] = md5


        #set up the logging directory
        log_path = self.log_path
        process_iden = global_conf.get('scgi_port', 'default')
        self.reddit_port = process_iden
        if log_path:
            if not os.path.exists(log_path):
                os.makedirs(log_path)
            for fname in os.listdir(log_path):
                if fname.startswith(process_iden):
                    full_name = os.path.join(log_path, fname)
                    os.remove(full_name)

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        #if self.media_domain == self.domain:
            #print ("Warning: g.media_domain == g.domain. " +
            #       "This may give untrusted content access to user cookies")

        #read in our CSS so that it can become a default for subreddit
        #stylesheets
        stylesheet_path = os.path.join(paths.get('static_files'),
                                       self.static_path.lstrip('/'),
                                       self.stylesheet)
        with open(stylesheet_path) as s:
            self.default_stylesheet = s.read()

        self.profanities = None
        if self.profanity_wordlist and os.path.exists(self.profanity_wordlist):
            with open(self.profanity_wordlist, 'r') as handle:
                words = []
                for line in handle:
                    words.append(line.strip(' \n\r'))
                if words:
                    self.profanities = re.compile(r"\b(%s)\b" % '|'.join(words),
                                              re.I | re.U)

        self.reddit_host = socket.gethostname()
        self.reddit_pid  = os.getpid()

        #the shutdown toggle
        self.shutdown = False

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision number
        try:
            popen = subprocess.Popen(["git", "log", "--date=short",
                                      "--pretty=format:%H %h", '-n1'],
                                     stdin=subprocess.PIPE,
                                     stdout=subprocess.PIPE)
            resp, stderrdata = popen.communicate()
            resp = resp.strip().split(' ')
            self.version, self.short_version = resp
        except object, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'
Esempio n. 17
0
import struct
import time

from odict import OrderedDict

import pycassa

from cassandra.ttypes import NotFoundException

__all__ = ['get_user_by_id', 'get_user_by_username', 'get_friend_ids',
    'get_follower_ids', 'get_users_for_user_ids', 'get_friends',
    'get_followers', 'get_timeline', 'get_userline', 'get_tweet', 'save_user',
    'save_tweet', 'add_friends', 'remove_friends', 'DatabaseError',
    'NotFound', 'InvalidDictionary', 'PUBLIC_USERLINE_KEY']

CLIENT = pycassa.connect_thread_local(framed_transport=True)

USER = pycassa.ColumnFamily(CLIENT, 'Twissandra', 'User',
    dict_class=OrderedDict)
USERNAME = pycassa.ColumnFamily(CLIENT, 'Twissandra', 'Username',
    dict_class=OrderedDict)
FRIENDS = pycassa.ColumnFamily(CLIENT, 'Twissandra', 'Friends',
    dict_class=OrderedDict)
FOLLOWERS = pycassa.ColumnFamily(CLIENT, 'Twissandra', 'Followers',
    dict_class=OrderedDict)
TWEET = pycassa.ColumnFamily(CLIENT, 'Twissandra', 'Tweet',
    dict_class=OrderedDict)
TIMELINE = pycassa.ColumnFamily(CLIENT, 'Twissandra', 'Timeline',
    dict_class=OrderedDict)
USERLINE = pycassa.ColumnFamily(CLIENT, 'Twissandra', 'Userline',
    dict_class=OrderedDict)
Esempio n. 18
0
import time

from ordereddict import OrderedDict

import pycassa

from pycassa.cassandra.ttypes import NotFoundException

__all__ = ['get_user_by_username', 'get_friend_usernames',
    'get_follower_usernames', 'get_users_for_usernames', 'get_friends',
    'get_followers', 'get_timeline', 'get_userline', 'get_tweet', 'save_user',
    'save_tweet', 'add_friends', 'remove_friends', 'DatabaseError',
    'NotFound', 'InvalidDictionary', 'PUBLIC_USERLINE_KEY']

CLIENT = pycassa.connect_thread_local('Twissandra')

USER = pycassa.ColumnFamily(CLIENT, 'User', dict_class=OrderedDict)
FRIENDS = pycassa.ColumnFamily(CLIENT, 'Friends', dict_class=OrderedDict)
FOLLOWERS = pycassa.ColumnFamily(CLIENT, 'Followers', dict_class=OrderedDict)
TWEET = pycassa.ColumnFamily(CLIENT, 'Tweet', dict_class=OrderedDict)
TIMELINE = pycassa.ColumnFamily(CLIENT, 'Timeline', dict_class=OrderedDict)
USERLINE = pycassa.ColumnFamily(CLIENT, 'Userline', dict_class=OrderedDict)

# NOTE: Having a single userline key to store all of the public tweets is not
#       scalable.  Currently, Cassandra requires that an entire row (meaning
#       every column under a given key) to be able to fit in memory.  You can
#       imagine that after a while, the entire public timeline would exceed
#       available memory.
#
#       The fix for this is to partition the timeline by time, so we could use
#       a key like !PUBLIC!2010-04-01 to partition it per day.  We could drill
Esempio n. 19
0
    def __init__(self, global_conf, app_conf, paths, **extra):
        """
        Globals acts as a container for objects available throughout
        the life of the application.

        One instance of Globals is created by Pylons during
        application initialization and is available during requests
        via the 'g' variable.

        ``global_conf``
            The same variable used throughout ``config/middleware.py``
            namely, the variables from the ``[DEFAULT]`` section of the
            configuration file.

        ``app_conf``
            The same ``kw`` dictionary used throughout
            ``config/middleware.py`` namely, the variables from the
            section in the config file for your application.

        ``extra``
            The configuration returned from ``load_config`` in 
            ``config/middleware.py`` which may be of use in the setup of
            your global variables.

        """

        # slop over all variables to start with
        for k, v in global_conf.iteritems():
            if not k.startswith("_") and not hasattr(self, k):
                if k in self.int_props:
                    v = int(v)
                elif k in self.float_props:
                    v = float(v)
                elif k in self.bool_props:
                    v = self.to_bool(v)
                elif k in self.tuple_props:
                    v = tuple(self.to_iter(v))
                elif k in self.choice_props:
                    if v not in self.choice_props[k]:
                        raise ValueError(
                            "Unknown option for %r: %r not in %r" %
                            (k, v, self.choice_props[k]))
                    v = self.choice_props[k][v]
                setattr(self, k, v)

        self.running_as_script = global_conf.get('running_as_script', False)

        # initialize caches. Any cache-chains built here must be added
        # to cache_chains (closed around by reset_caches) so that they
        # can properly reset their local components

        localcache_cls = (SelfEmptyingCache
                          if self.running_as_script else LocalCache)
        num_mc_clients = self.num_mc_clients

        self.cache_chains = []

        self.memcache = CMemcache(self.memcaches, num_clients=num_mc_clients)
        self.make_lock = make_lock_factory(self.memcache)

        if not self.cassandra_seeds:
            raise ValueError("cassandra_seeds not set in the .ini")
        if not self.url_seeds:
            raise ValueError("url_seeds not set in the .ini")
        self.cassandra_seeds = list(self.cassandra_seeds)
        random.shuffle(self.cassandra_seeds)
        self.cassandra = pycassa.connect_thread_local(self.cassandra_seeds)
        perma_memcache = (CMemcache(self.permacache_memcaches,
                                    num_clients=num_mc_clients)
                          if self.permacache_memcaches else None)
        self.permacache = self.init_cass_cache(
            'permacache',
            'permacache',
            self.cassandra,
            self.make_lock,
            memcache=perma_memcache,
            read_consistency_level=self.cassandra_rcl,
            write_consistency_level=self.cassandra_wcl,
            localcache_cls=localcache_cls)
        self.cache_chains.append(self.permacache)

        self.url_seeds = list(self.url_seeds)
        random.shuffle(self.url_seeds)
        self.url_cassandra = pycassa.connect_thread_local(self.url_seeds)
        self.urlcache = self.init_cass_cache(
            'urls',
            'urls',
            self.url_cassandra,
            self.make_lock,
            # until we've merged this
            # with the regular
            # cluster, this will
            # always be CL_ONE
            read_consistency_level=CL_ONE,
            write_consistency_level=CL_ONE,
            localcache_cls=localcache_cls)
        self.cache_chains.append(self.urlcache)

        # hardcache is done after the db info is loaded, and then the
        # chains are reset to use the appropriate initial entries

        self.cache = MemcacheChain((localcache_cls(), self.memcache))
        self.cache_chains.append(self.cache)

        self.rendercache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.rendercaches,
                       noreply=True,
                       no_block=True,
                       num_clients=num_mc_clients)))
        self.cache_chains.append(self.rendercache)

        self.servicecache = MemcacheChain(
            (localcache_cls(),
             CMemcache(self.servicecaches, num_clients=num_mc_clients)))
        self.cache_chains.append(self.servicecache)

        self.thing_cache = CacheChain((localcache_cls(), ))
        self.cache_chains.append(self.thing_cache)

        # set default time zone if one is not set
        tz = global_conf.get('timezone')
        dtz = global_conf.get('display_timezone', tz)

        self.tz = pytz.timezone(tz)
        self.display_tz = pytz.timezone(dtz)

        #load the database info
        self.dbm = self.load_db_params(global_conf)

        # can't do this until load_db_params() has been called
        self.hardcache = HardcacheChain(
            (localcache_cls(), self.memcache, HardCache(self)),
            cache_negative_results=True)
        self.cache_chains.append(self.hardcache)

        # I know this sucks, but we need non-request-threads to be
        # able to reset the caches, so we need them be able to close
        # around 'cache_chains' without being able to call getattr on
        # 'g'
        cache_chains = self.cache_chains[::]

        def reset_caches():
            for chain in cache_chains:
                chain.reset()

        self.reset_caches = reset_caches
        self.reset_caches()

        #make a query cache
        self.stats_collector = QueryStats()

        # set the modwindow
        self.MODWINDOW = timedelta(self.MODWINDOW)

        self.REDDIT_MAIN = bool(os.environ.get('REDDIT_MAIN'))

        # turn on for language support
        self.languages, self.lang_name = \
                        get_active_langs(default_lang= self.lang)

        all_languages = self.lang_name.keys()
        all_languages.sort()
        self.all_languages = all_languages

        self.paths = paths

        # load the md5 hashes of files under static
        static_files = os.path.join(paths.get('static_files'), 'static')
        self.static_md5 = {}
        if os.path.exists(static_files):
            for f in os.listdir(static_files):
                if f.endswith('.md5'):
                    key = f[0:-4]
                    f = os.path.join(static_files, f)
                    with open(f, 'r') as handle:
                        md5 = handle.read().strip('\n')
                    self.static_md5[key] = md5

        #set up the logging directory
        log_path = self.log_path
        process_iden = global_conf.get('scgi_port', 'default')
        self.reddit_port = process_iden
        if log_path:
            if not os.path.exists(log_path):
                os.makedirs(log_path)
            for fname in os.listdir(log_path):
                if fname.startswith(process_iden):
                    full_name = os.path.join(log_path, fname)
                    os.remove(full_name)

        #setup the logger
        self.log = logging.getLogger('reddit')
        self.log.addHandler(logging.StreamHandler())
        if self.debug:
            self.log.setLevel(logging.DEBUG)
        else:
            self.log.setLevel(logging.INFO)

        # set log level for pycountry which is chatty
        logging.getLogger('pycountry.db').setLevel(logging.CRITICAL)

        if not self.media_domain:
            self.media_domain = self.domain
        if self.media_domain == self.domain:
            print("Warning: g.media_domain == g.domain. " +
                  "This may give untrusted content access to user cookies")

        #read in our CSS so that it can become a default for subreddit
        #stylesheets
        stylesheet_path = os.path.join(paths.get('static_files'),
                                       self.static_path.lstrip('/'),
                                       self.stylesheet)
        with open(stylesheet_path) as s:
            self.default_stylesheet = s.read()

        self.profanities = None
        if self.profanity_wordlist and os.path.exists(self.profanity_wordlist):
            with open(self.profanity_wordlist, 'r') as handle:
                words = []
                for line in handle:
                    words.append(line.strip(' \n\r'))
                if words:
                    self.profanities = re.compile(
                        r"\b(%s)\b" % '|'.join(words), re.I | re.U)

        self.reddit_host = socket.gethostname()
        self.reddit_pid = os.getpid()

        #the shutdown toggle
        self.shutdown = False

        #if we're going to use the query_queue, we need amqp
        if self.write_query_queue and not self.amqp_host:
            raise Exception("amqp_host must be defined to use the query queue")

        # This requirement doesn't *have* to be a requirement, but there are
        # bugs at the moment that will pop up if you violate it
        if self.write_query_queue and not self.use_query_cache:
            raise Exception("write_query_queue requires use_query_cache")

        # try to set the source control revision number
        try:
            popen = subprocess.Popen(
                ["git", "log", "--date=short", "--pretty=format:%H %h", '-n1'],
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE)
            resp, stderrdata = popen.communicate()
            resp = resp.strip().split(' ')
            self.version, self.short_version = resp
        except object, e:
            self.log.info("Couldn't read source revision (%r)" % e)
            self.version = self.short_version = '(unknown)'