Example #1
0
def fancy_stats_deferred(exid, start_dt, end_dt, cursor, uid, i):
    key_name = ExerciseStatisticShard.make_key(exid, start_dt, end_dt, cursor)
    if cursor and ExerciseStatisticShard.get_by_key_name(key_name):
        # We've already run, die.
        return

    query = ProblemLog.all()
    query.filter('exercise =', exid)
    query.filter('correct =', True)
    query.filter('time_done >=', start_dt)
    query.filter('time_done <', end_dt)
    query.order('-time_done')

    if cursor:
        query.with_cursor(cursor)

    problem_logs = query.fetch(1000)
    if len(problem_logs) > 0:
        logging.info("processing %d logs for %s" % (len(problem_logs), exid))

        stats = fancy_stats_from_logs(problem_logs)
        pickled = pickle_util.dump(stats)

        shard = ExerciseStatisticShard(
            key_name=key_name,
            exid=exid,
            start_dt=start_dt,
            end_dt=end_dt,
            cursor=cursor,
            blob_val=pickled)
        shard.put()

        enqueue_task(exid, start_dt, end_dt, query.cursor(), uid, i + 1)
    else:
        # No more problem logs left to process
        logging.info("Summing all stats for %s", exid)

        all_stats = fancy_stats_shard_reducer(exid, start_dt, end_dt)

        model = ExerciseStatistic(
            key_name=ExerciseStatistic.make_key(exid, start_dt, end_dt),
            exid=exid,
            start_dt=start_dt,
            end_dt=end_dt,
            blob_val=pickle_util.dump(all_stats),
            log_count=all_stats['log_count'])
        model.put()

        logging.info("done processing %d logs for %s",
                     all_stats['log_count'], exid)
Example #2
0
    def set_multi(mapping, 
                  time=DEFAULT_LAYER_CACHE_EXPIRATION_SECONDS, 
                  namespace=""):                        
        ''' sets multiple KeyValueCache entries at once. It mirrors the 
        parameters of memcache.set_multi. Note: set_multi is not atomic      
        '''
        
        namespaced_mapping = dict(
            (KeyValueCache.get_namespaced_key(key, namespace), value) 
            for key, value in mapping.iteritems())

        dt = datetime.datetime.now()

        dt_expires = datetime.datetime.max
        if time > 0:
            dt_expires = dt + datetime.timedelta(seconds=time)

        key_values = []
        
        for namespaced_key, value in namespaced_mapping.iteritems():    
            
            # check to see if we need to pickle the results
            pickled = False
            if not isinstance(value, str):
                pickled = True
                value = pickle_util.dump(value)

            key_values.append(KeyValueCache(
                    key_name = namespaced_key,
                    value = value,
                    created = dt,
                    expires = dt_expires,
                    pickled = pickled))
        
        db.put(key_values)
Example #3
0
    def test_new_class_to_old_class(self):
        class NewOldClass(object):
            """New-style class."""
            def __init__(self, x, y):
                self.x = x
                self.y = y
        # A trick so we can pickle this class even though it's nested.
        setattr(sys.modules[__name__], 'NewOldClass', NewOldClass)
        pickled_new = pickle_util.dump(NewOldClass(5, 11))

        # Redefine NewOldClass to be old-style
        del NewOldClass

        class NewOldClass:
            """Old-style class."""
            def __init__(self, x, y):
                self.x = x
                self.y = y
        setattr(sys.modules[__name__], 'NewOldClass', NewOldClass)

        # Make sure the unpickling uses pickle, not cpickle
        old_cpickle = pickle_util.cPickle
        old_unpickler_class = pickle_util.g_unpickler_class
        try:
            pickle_util.cPickle = pickle_util.pickle    
            pickle_util.g_unpickler_class = (
                pickle_util.RenamedClassUnpicklerForPicklePy)

            foo = pickle_util.load(pickled_new)
            self.assertEqual(5, foo.x)
            self.assertEqual(11, foo.y)
            self.assertEqual('Old-style class.', foo.__doc__)
        finally:
            pickle_util.cPickle = old_cpickle
            pickle_util.g_unpickler_class = old_unpickler_class
Example #4
0
def _set_css_deferred(user_data_key, video_key, status, version):
    user_data = user_models.UserData.get(user_data_key)
    uvc = UserVideoCss.get_for_user_data(user_data)
    css = pickle_util.load(uvc.pickled_dict)

    id = '.v%d' % video_key.id()
    if status == UserVideoCss.STARTED:
        if id in css['completed']:
            logging.warn("video [%s] for [%s] went from completed->started. ignoring." %
                         (video_key, user_data_key))
        else:
            css['started'].add(id)
    else:
        css['started'].discard(id)
        css['completed'].add(id)

    uvc.pickled_dict = pickle_util.dump(css)
    uvc.load_pickled()

    # if set_css_deferred runs out of order then we bump the version number
    # to break the cache
    if version < uvc.version:
        version = uvc.version + 1
        user_data.uservideocss_version += 1
        db.put(user_data)

    uvc.version = version
    db.put(uvc)
Example #5
0
 def get_for_user_data(user_data):
     p = pickle_util.dump({'started': set([]), 'completed': set([])})
     return UserVideoCss.get_or_insert(UserVideoCss._key_for(user_data),
                                       user=user_data.user,
                                       video_css='',
                                       pickled_dict=p,
                                       )
Example #6
0
    def set_multi(mapping,
                  time=DEFAULT_LAYER_CACHE_EXPIRATION_SECONDS,
                  namespace=""):
        ''' sets multiple KeyValueCache entries at once. It mirrors the 
        parameters of memcache.set_multi. Note: set_multi is not atomic      
        '''

        namespaced_mapping = dict(
            (KeyValueCache.get_namespaced_key(key, namespace), value)
            for key, value in mapping.iteritems())

        dt = datetime.datetime.now()

        dt_expires = datetime.datetime.max
        if time > 0:
            dt_expires = dt + datetime.timedelta(seconds=time)

        key_values = []

        for namespaced_key, value in namespaced_mapping.iteritems():

            # check to see if we need to pickle the results
            pickled = False
            if not isinstance(value, str):
                pickled = True
                value = pickle_util.dump(value)

            key_values.append(
                KeyValueCache(key_name=namespaced_key,
                              value=value,
                              created=dt,
                              expires=dt_expires,
                              pickled=pickled))

        db.put(key_values)
Example #7
0
 def save(experiment, notes, emotions):
     """Save notes and emo list, associating with specified experiment."""
     notes = _GAEBingoExperimentNotes(
         key_name = _GAEBingoExperimentNotes.key_for_experiment(experiment),
         parent = experiment,
         notes = notes,
         pickled_emotions = pickle_util.dump(emotions))
     notes.put()
 def test_rewritten_class_instance(self):
     global OldClass
     # Mock out the rename-map.
     pickle_util._CLASS_RENAME_MAP = {
         ('pickle_util_test', 'OldClass'):
         ('mod.submod1.submod2', 'NewClass')
     }
     pickled = pickle_util.dump(OldClass())
     # Just to make this more fun, delete OldClass
     del OldClass
     actual = pickle_util.load(pickled)
     import mod.submod1.submod2
     self.assertTrue(isinstance(actual, mod.submod1.submod2.NewClass))
Example #9
0
 def test_rewritten_class_instance(self):
     global OldClass
     # Mock out the rename-map.
     pickle_util._CLASS_RENAME_MAP = {
         ('pickle_util_test', 'OldClass'):
         ('mod.submod1.submod2', 'NewClass')
         }
     pickled = pickle_util.dump(OldClass())
     # Just to make this more fun, delete OldClass
     del OldClass
     actual = pickle_util.load(pickled)
     import mod.submod1.submod2
     self.assertTrue(isinstance(actual, mod.submod1.submod2.NewClass))
Example #10
0
def persist_gae_bingo_identity_records(list_identities):

    dict_identity_caches = memcache.get_multi([BingoIdentityCache.key_for_identity(ident) for ident in list_identities])

    for ident in list_identities:
        identity_cache = dict_identity_caches.get(BingoIdentityCache.key_for_identity(ident))

        if identity_cache:
            bingo_identity = _GAEBingoIdentityRecord(
                        key_name = _GAEBingoIdentityRecord.key_for_identity(ident),
                        identity = ident,
                        pickled = pickle_util.dump(identity_cache),
                    )
            bingo_identity.put()
Example #11
0
 def setUp(self):
     super(LayoutTest, self).setUp(db_consistency_probability=1)
     self.testbed = testbed.Testbed()
     self.testbed.activate()
     self.testbed.init_taskqueue_stub()
     self.taskqueue_stub = self.testbed.get_stub(testbed.TASKQUEUE_SERVICE_NAME)
     json_data=open('testutil/topictree.json')
     data = json.load(json_data)
     version = topic_models.TopicVersion.create_new_version()
     version.default = True
     version.put()
     version = topic_models.TopicVersion.create_edit_version()
     v1_utils.topictree_import_task("edit", "root",
                    False,
                    zlib.compress(pickle_util.dump(data)))
Example #12
0
    def set(key,
            value,
            time=None,
            namespace="",
            cache_class=memcache,
            compress=True):
        ''' This function will pickle and perhaps compress value, before then
        breaking it up into 1MB chunks and storing it with set_multi to whatever
        class cache_class is set to (memcache or KeyValueCache)
        '''

        result = pickle_util.dump(value)
        if compress:
            result = zlib.compress(result)

        size = len(result)
        if size > MAX_SIZE:
            logging.warning("Not caching %s: %i is greater than maxsize %i" %
                            (key, size, MAX_SIZE))
            return

        # if now that we have compressed the item it can fit within a single
        # 1MB object don't use the chunk_list, and it will save us from having
        # to do an extra round-trip on the gets
        if size < MAX_SIZE_OF_CACHE_CHUNKS:
            return cache_class.set(key,
                                   ChunkedResult(data=result,
                                                 compress=compress),
                                   time=time,
                                   namespace=namespace)

        mapping = {}
        chunk_list = []
        generation = os.urandom(CHUNK_GENERATION_LENGTH)
        for i, pos in enumerate(range(0, size, MAX_SIZE_OF_CACHE_CHUNKS)):
            chunk = generation + result[pos:pos + MAX_SIZE_OF_CACHE_CHUNKS]
            chunk_key = key + "__chunk%i__" % i
            mapping[chunk_key] = chunk
            chunk_list.append(chunk_key)

        mapping[key] = ChunkedResult(chunk_list=chunk_list,
                                     generation=generation,
                                     compress=compress)

        # Note: set_multi is not atomic so when we get we will need to make sure
        # that all the keys are there and are part of the same set_multi
        # operation
        return cache_class.set_multi(mapping, time=time, namespace=namespace)
Example #13
0
def create_experiment_and_alternatives(
    experiment_name,
    canonical_name,
    alternative_params=None,
    conversion_name=None,
    conversion_type=ConversionTypes.Binary,
    family_name=None,
):

    if not experiment_name:
        raise Exception("gae_bingo experiments must be named.")

    conversion_name = conversion_name or experiment_name

    if not alternative_params:
        # Default to simple True/False testing
        alternative_params = [True, False]

    experiment = _GAEBingoExperiment(
        key_name=_GAEBingoExperiment.key_for_name(experiment_name),
        name=experiment_name,
        canonical_name=canonical_name,
        family_name=family_name,
        conversion_name=conversion_name,
        conversion_type=conversion_type,
        live=True,
    )

    alternatives = []

    is_dict = type(alternative_params) == dict
    for i, content in enumerate(alternative_params):

        alternatives.append(
            _GAEBingoAlternative(
                key_name=_GAEBingoAlternative.key_for_experiment_name_and_number(experiment_name, i),
                parent=experiment,
                experiment_name=experiment.name,
                number=i,
                pickled_content=pickle_util.dump(content),
                live=True,
                weight=alternative_params[content] if is_dict else 1,
            )
        )

    return experiment, alternatives
Example #14
0
def persist_gae_bingo_identity_records(list_identities):

    dict_identity_caches = memcache.get_multi([
        BingoIdentityCache.key_for_identity(ident) for ident in list_identities
    ])

    for ident in list_identities:
        identity_cache = dict_identity_caches.get(
            BingoIdentityCache.key_for_identity(ident))

        if identity_cache:
            bingo_identity = _GAEBingoIdentityRecord(
                key_name=_GAEBingoIdentityRecord.key_for_identity(ident),
                identity=ident,
                pickled=pickle_util.dump(identity_cache),
            )
            bingo_identity.put()
Example #15
0
    def topic_update_from_live(self, edit_version):
        layout.update_from_live(edit_version)
        try:
            response = urlfetch.fetch(
                url="http://www.khanacademy.org/api/v1/topictree",
                deadline=25)
            topictree = json.loads(response.content)

            logging.info("calling /_ah/queue/deferred_import")

            # importing the full topic tree can be too large so pickling and compressing
            deferred.defer(api.v1_utils.topictree_import_task, "edit", "root", True,
                        zlib.compress(pickle_util.dump(topictree)),
                        _queue="import-queue",
                        _url="/_ah/queue/deferred_import")

        except urlfetch.Error, e:
            logging.exception("Failed to fetch content from khanacademy.org")
Example #16
0
    def set(key, value, time=None, namespace="", cache_class=memcache, 
            compress=True):
        ''' This function will pickle and perhaps compress value, before then
        breaking it up into 1MB chunks and storing it with set_multi to whatever
        class cache_class is set to (memcache or KeyValueCache)
        '''

        result = pickle_util.dump(value)
        if compress:
            result = zlib.compress(result)
        
        size = len(result)
        if size > MAX_SIZE:
            logging.warning("Not caching %s: %i is greater than maxsize %i" % 
                            (key, size, MAX_SIZE))
            return
            
        # if now that we have compressed the item it can fit within a single
        # 1MB object don't use the chunk_list, and it will save us from having
        # to do an extra round-trip on the gets
        if size < MAX_SIZE_OF_CACHE_CHUNKS:
            return cache_class.set(key, 
                                   ChunkedResult(data=result, 
                                                 compress=compress),
                                   time=time,
                                   namespace=namespace)              
                                    
        mapping = {}
        chunk_list = []
        generation = os.urandom(CHUNK_GENERATION_LENGTH) 
        for i, pos in enumerate(range(0, size, MAX_SIZE_OF_CACHE_CHUNKS)):
            chunk = generation + result[pos : pos + MAX_SIZE_OF_CACHE_CHUNKS]                           
            chunk_key = key + "__chunk%i__" % i
            mapping[chunk_key] = chunk
            chunk_list.append(chunk_key)

        mapping[key] = ChunkedResult(chunk_list=chunk_list, 
                                     generation= generation, 
                                     compress=compress)
        
        # Note: set_multi is not atomic so when we get we will need to make sure 
        # that all the keys are there and are part of the same set_multi 
        # operation
        return cache_class.set_multi(mapping, time=time, namespace=namespace)
Example #17
0
def create_experiment_and_alternatives(experiment_name,
                                       canonical_name,
                                       alternative_params=None,
                                       conversion_name=None,
                                       conversion_type=ConversionTypes.Binary,
                                       family_name=None):

    if not experiment_name:
        raise Exception("gae_bingo experiments must be named.")

    conversion_name = conversion_name or experiment_name

    if not alternative_params:
        # Default to simple True/False testing
        alternative_params = [True, False]

    experiment = _GAEBingoExperiment(
        key_name=_GAEBingoExperiment.key_for_name(experiment_name),
        name=experiment_name,
        canonical_name=canonical_name,
        family_name=family_name,
        conversion_name=conversion_name,
        conversion_type=conversion_type,
        live=True,
    )

    alternatives = []

    is_dict = type(alternative_params) == dict
    for i, content in enumerate(alternative_params):

        alternatives.append(
            _GAEBingoAlternative(
                key_name=_GAEBingoAlternative.
                key_for_experiment_name_and_number(experiment_name, i),
                parent=experiment,
                experiment_name=experiment.name,
                number=i,
                pickled_content=pickle_util.dump(content),
                live=True,
                weight=alternative_params[content] if is_dict else 1,
            ))

    return experiment, alternatives
Example #18
0
def create_experiment_and_alternatives(experiment_name, canonical_name, alternative_params = None, conversion_name = None, conversion_type = ConversionTypes.Binary, family_name = None):

    if not experiment_name:
        raise Exception("gae_bingo experiments must be named.")

    conversion_name = conversion_name or experiment_name

    if not alternative_params:
        # Default to simple True/False testing
        alternative_params = [True, False]

    # Generate a random key name for this experiment so it doesn't collide with
    # any past experiments of the same name. All other entities, such as
    # alternatives, snapshots, and notes, will then use this entity as their
    # parent.
    experiment = _GAEBingoExperiment(
                key_name = "%s:%s" % (
                    experiment_name, os.urandom(8).encode("hex")),
                name = experiment_name,
                canonical_name = canonical_name,
                family_name = family_name,
                conversion_name = conversion_name,
                conversion_type = conversion_type,
                live = True,
            )

    alternatives = []

    is_dict = type(alternative_params) == dict
    for i, content in enumerate(alternative_params):

        alternatives.append(
                _GAEBingoAlternative(
                        key_name = _GAEBingoAlternative.key_for_experiment_name_and_number(experiment_name, i),
                        parent = experiment,
                        experiment_name = experiment.name,
                        number = i,
                        pickled_content = pickle_util.dump(content),
                        live = True,
                        weight = alternative_params[content] if is_dict else 1,
                    )
                )

    return experiment, alternatives
Example #19
0
    def topic_update_from_live(self, edit_version):
        layout.update_from_live(edit_version)
        try:
            if App.is_dev_server:
                topictree_url="http://khan-testing.appspot.com/topictree_test.json"
            else:
                topictree_url=url_util.absolute_url("/topictree.json")
            response = urlfetch.fetch(
                url=topictree_url,
                deadline=25)
            topictree = json.loads(response.content)

            logging.info("calling /_ah/queue/deferred_import")

            # importing the full topic tree can be too large so pickling and compressing
            deferred.defer(api.v1_utils.topictree_import_task, "edit", "root", True,
                        zlib.compress(pickle_util.dump(topictree)),
                        _queue="import-queue",
                        _url="/_ah/queue/deferred_import")

        except urlfetch.Error, e:
            logging.exception("Failed to fetch content from khanacademy.org")
    def test_new_class_to_old_class(self):
        class NewOldClass(object):
            """New-style class."""
            def __init__(self, x, y):
                self.x = x
                self.y = y

        # A trick so we can pickle this class even though it's nested.
        setattr(sys.modules[__name__], 'NewOldClass', NewOldClass)
        pickled_new = pickle_util.dump(NewOldClass(5, 11))

        # Redefine NewOldClass to be old-style
        del NewOldClass

        class NewOldClass:
            """Old-style class."""
            def __init__(self, x, y):
                self.x = x
                self.y = y

        setattr(sys.modules[__name__], 'NewOldClass', NewOldClass)

        # Make sure the unpickling uses pickle, not cpickle
        old_cpickle = pickle_util.cPickle
        old_unpickler_class = pickle_util.g_unpickler_class
        try:
            pickle_util.cPickle = pickle_util.pickle
            pickle_util.g_unpickler_class = (
                pickle_util.RenamedClassUnpicklerForPicklePy)

            foo = pickle_util.load(pickled_new)
            self.assertEqual(5, foo.x)
            self.assertEqual(11, foo.y)
            self.assertEqual('Old-style class.', foo.__doc__)
        finally:
            pickle_util.cPickle = old_cpickle
            pickle_util.g_unpickler_class = old_unpickler_class
Example #21
0
 def compress(value):
     """Compress value so it'll fit in a single memcache value."""
     pickled = pickle_util.dump(value)
     return zlib.compress(pickled)
Example #22
0
 def compress(value):
     """Compress value so it'll fit in a single memcache value."""
     pickled = pickle_util.dump(value)
     return zlib.compress(pickled)
Example #23
0
 def test_simple_class(self):
     """Test pickling and unpickling a class and class instance."""
     expected = (OldClass, OldClass())
     actual = pickle_util.load(pickle_util.dump(expected))
     self.assertEqual(expected[0], actual[0])
     self.assertEqual(type(expected[1]), type(actual[1]))
Example #24
0
 def test_simple(self):
     expected = 'i am a simple type'
     actual = pickle_util.load(pickle_util.dump(expected))
     self.assertEqual(expected, actual)
 def validate(self, value):
     """Validate that value is pickle-able (raise an exception if not)."""
     pickled_value = pickle_util.dump(value)
     _ = super(ObjectProperty, self).validate(pickled_value)
     return value
 def test_using_cpickle_to_unpickle(self):
     expected = 'This is a test string'
     actual = cPickle.loads(pickle_util.dump(expected))
     self.assertEqual(expected, actual)
Example #27
0
 def pickled(*args, **kwargs):
     return pickle_util.dump(func(*args, **kwargs))
Example #28
0
 def set_short_circuit_content(self, value):
     self.short_circuit_pickled_content = pickle_util.dump(value)
Example #29
0
 def set_short_circuit_content(self, value):
     self.short_circuit_pickled_content = pickle_util.dump(value)
 def test_simple_class(self):
     """Test pickling and unpickling a class and class instance."""
     expected = (OldClass, OldClass())
     actual = pickle_util.load(pickle_util.dump(expected))
     self.assertEqual(expected[0], actual[0])
     self.assertEqual(type(expected[1]), type(actual[1]))
 def test_simple(self):
     expected = 'i am a simple type'
     actual = pickle_util.load(pickle_util.dump(expected))
     self.assertEqual(expected, actual)
Example #32
0
    def store(self):
        # Store compressed results so we stay under the memcache 1MB limit
        pickled = pickle_util.dump(self)
        compressed_pickled = zlib.compress(pickled)

        return memcache.set(RequestStats.memcache_key(self.request_id), compressed_pickled)
Example #33
0
 def test_using_cpickle_to_unpickle(self):
     expected = 'This is a test string'
     actual = cPickle.loads(pickle_util.dump(expected))
     self.assertEqual(expected, actual)
Example #34
0
 def pickled(*args, **kwargs):
     return pickle_util.dump(func(*args, **kwargs))
 def get_value_for_datastore(self, model_instance):
     result = (super(ObjectProperty, self)
               .get_value_for_datastore(model_instance))
     result = pickle_util.dump(result)
     return db.Blob(result)