Example #1
0
def join_authors():
    """A reducer that joins thing table dumps and data table dumps"""
    def process(thing_id, vals):
        data = {}
        authors = []
        gold = None

        for val in vals:
            if ('comment' in val) or ("link" in val):
                authors.append(mr_tools.format_dataspec(val,
                                      ['data_type', # e.g. 'data'
                                       'thing_type', # e.g. 'link'
                                       'key', # e.g. 'sr_id'
                                       'tid'
                                       ]))
            elif 'account' in val:
                gold = mr_tools.format_dataspec(val,
                                      ['data_type', # e.g. 'data'
                                       'thing_type', # e.g. 'link'
                                       'key', # e.g. 'sr_id'
                                       'value'])

        if gold is not None:
            for author in authors:
                yield (author.tid, author.data_type, author.thing_type,
                       author.key, thing_id)

    mr_tools.mr_reduce(process)
Example #2
0
def join_authors():
    """A reducer that joins thing table dumps and data table dumps"""
    def process(thing_id, vals):
        data = {}
        authors = []
        gold = None

        for val in vals:
            if ('comment' in val) or ("link" in val):
                authors.append(
                    mr_tools.format_dataspec(
                        val,
                        [
                            'data_type',  # e.g. 'data'
                            'thing_type',  # e.g. 'link'
                            'key',  # e.g. 'sr_id'
                            'tid'
                        ]))
            elif 'account' in val:
                gold = mr_tools.format_dataspec(
                    val,
                    [
                        'data_type',  # e.g. 'data'
                        'thing_type',  # e.g. 'link'
                        'key',  # e.g. 'sr_id'
                        'value'
                    ])

        if gold is not None:
            for author in authors:
                yield (author.tid, author.data_type, author.thing_type,
                       author.key, thing_id)

    mr_tools.mr_reduce(process)
Example #3
0
def store_sorts():
    from r2.models import CommentSortsCache
    from r2.lib.db.tdb_cassandra import CL

    # we're going to do our own Cassandra work here, skipping the
    # tdb_cassandra layer
    cf = CommentSortsCache._cf

    def _process(key, vals):
        vals = dict(vals)

        # this has already been serialised to strings
        cf.insert(key, vals, write_consistency_level = CL.ANY)

        return []

    return mr_tools.mr_reduce(_process)
Example #4
0
def store_sorts():
    from r2.models import CommentSortsCache
    from r2.lib.db.tdb_cassandra import CL

    # we're going to do our own Cassandra work here, skipping the
    # tdb_cassandra layer
    cf = CommentSortsCache._cf

    def _process(key, vals):
        vals = dict(vals)

        # this has already been serialised to strings
        cf.insert(key, vals, write_consistency_level = CL.ANY)

        return []

    return mr_tools.mr_reduce(_process)