def join_authors(): """A reducer that joins thing table dumps and data table dumps""" def process(thing_id, vals): data = {} authors = [] gold = None for val in vals: if ('comment' in val) or ("link" in val): authors.append(mr_tools.format_dataspec(val, ['data_type', # e.g. 'data' 'thing_type', # e.g. 'link' 'key', # e.g. 'sr_id' 'tid' ])) elif 'account' in val: gold = mr_tools.format_dataspec(val, ['data_type', # e.g. 'data' 'thing_type', # e.g. 'link' 'key', # e.g. 'sr_id' 'value']) if gold is not None: for author in authors: yield (author.tid, author.data_type, author.thing_type, author.key, thing_id) mr_tools.mr_reduce(process)
def join_authors(): """A reducer that joins thing table dumps and data table dumps""" def process(thing_id, vals): data = {} authors = [] gold = None for val in vals: if ('comment' in val) or ("link" in val): authors.append( mr_tools.format_dataspec( val, [ 'data_type', # e.g. 'data' 'thing_type', # e.g. 'link' 'key', # e.g. 'sr_id' 'tid' ])) elif 'account' in val: gold = mr_tools.format_dataspec( val, [ 'data_type', # e.g. 'data' 'thing_type', # e.g. 'link' 'key', # e.g. 'sr_id' 'value' ]) if gold is not None: for author in authors: yield (author.tid, author.data_type, author.thing_type, author.key, thing_id) mr_tools.mr_reduce(process)
def store_sorts(): from r2.models import CommentSortsCache from r2.lib.db.tdb_cassandra import CL # we're going to do our own Cassandra work here, skipping the # tdb_cassandra layer cf = CommentSortsCache._cf def _process(key, vals): vals = dict(vals) # this has already been serialised to strings cf.insert(key, vals, write_consistency_level = CL.ANY) return [] return mr_tools.mr_reduce(_process)