def get_data(table, thing_id): r, single = fetch_query(table, table.c.thing_id, thing_id) #if single, only return one storage, otherwise make a dict res = storage() if single else {} for row in r: val = db2py(row.value, row.kind) stor = res if single else res.setdefault(row.thing_id, storage()) stor[row.key] = val return res
def describe_jobflows_cached(emr_connection): """Return a list of jobflows on this connection. It's good to cache this information because hitting AWS too often can result in rate limiting, and it's not particularly detrimental to have slightly out of date information in most cases. Non-running jobflows and information we don't need are discarded to reduce the size of cached data. """ jobflows = emr_connection.describe_jobflows() r_jobflows = [] for jf in jobflows: # skip old not live jobflows d = jf.steps[-1].creationdatetime.split('T')[0] last_step_start = datetime.datetime.strptime(d, '%Y-%m-%d').date() now = datetime.datetime.now().date() if (jf.state not in LIVE_STATES and now - last_step_start > datetime.timedelta(2)): continue # keep only fields we need r_jf = storage(name=jf.name, jobflowid=jf.jobflowid, state=jf.state) r_bootstrapactions = [] for i in jf.bootstrapactions: s = storage(name=i.name, path=i.path, args=[a.value for a in i.args]) r_bootstrapactions.append(s) r_jf['bootstrapactions'] = r_bootstrapactions r_steps = [] for i in jf.steps: s = storage(name=i.name, state=i.state, jar=i.jar, args=[a.value for a in i.args]) r_steps.append(s) r_jf['steps'] = r_steps r_instancegroups = [] for i in jf.instancegroups: s = storage(name=i.name, instancegroupid=i.instancegroupid, instancerequestcount=i.instancerequestcount) r_instancegroups.append(s) r_jf['instancegroups'] = r_instancegroups r_jobflows.append(r_jf) return r_jobflows
def get_data(table, thing_id): r, single = fetch_query(table, table.c.thing_id, thing_id) #if single, only return one storage, otherwise make a dict res = storage() if single else {} for row in r: val = db2py(row.value, row.kind) stor = res if single else res.setdefault(row.thing_id, storage()) if single and row.thing_id != thing_id: raise ValueError, ("tdb_sql.py: there's shit in the plumbing." + " got %s, wanted %s" % (row.thing_id, thing_id)) stor[row.key] = val return res
def build_thing_tables(): for name, engines in dbm.things_iter(): type_id = check_type(type_table, name, dict(name=name)) tables = [] for engine in engines: metadata = make_metadata(engine) #make thing table thing_table = get_thing_table(metadata, name) create_table(thing_table, index_commands(thing_table, 'thing')) #make data tables data_table = get_data_table(metadata, name) create_table(data_table, index_commands(data_table, 'data')) tables.append((thing_table, data_table)) thing = storage(type_id=type_id, name=name, avoid_master_reads=dbm.avoid_master_reads.get(name), tables=tables) types_id[type_id] = thing types_name[name] = thing
def build_fn(row): # return Storage objects with just the requested props props = {} for prop in ret_props: db_prop = prop[1:] # column name doesn't have _ prefix props[prop] = getattr(row, db_prop) return storage(**props)
def build_thing_tables(): for name, engines in dbm.things_iter(): type_id = check_type(type_table, type_table.c.name == name, dict(name = name)) tables = [] for engine in engines: metadata = make_metadata(engine) #make thing table thing_table = get_thing_table(metadata, name) create_table(thing_table, index_commands(thing_table, 'thing')) #make data tables data_table = get_data_table(metadata, name) create_table(data_table, index_commands(data_table, 'data')) tables.append((thing_table, data_table)) thing = storage(type_id = type_id, name = name, avoid_master_reads = dbm.avoid_master_reads.get(name), tables = tables) types_id[type_id] = thing types_name[name] = thing
def build_rel_tables(): for name, type1_name, type2_name, engine in dbm.relations(): type1_id = types_name[type1_name].type_id type2_id = types_name[type2_name].type_id type_id = check_type( rel_type_table, rel_type_table.c.name == name, dict(name=name, type1_id=type1_id, type2_id=type2_id)) metadata = make_metadata(engine) #relation table rel_table = get_rel_table(metadata, name) create_table(rel_table, index_commands(rel_table, 'rel')) #make thing1 table if required if engine == thing_engines[type1_name]: rel_t1_table = types_name[type1_name].thing_table else: #need to maintain an extra thing table? if dbm.extra_thing1.get(engine): rel_t1_table = get_thing_table(metadata, 'rel_' + name + '_type1') create_table(rel_t1_table, index_commands(rel_t1_table, 'thing')) extra_thing_tables.setdefault(type_id, set()).add(rel_t1_table) else: rel_t1_table = get_thing_table(metadata, type1_name) #make thing2 table if required if type1_id == type2_id: rel_t2_table = rel_t1_table elif engine == thing_engines[type2_name]: rel_t2_table = types_name[type2_name].thing_table else: if dbm.extra_thing2.get(engine): rel_t2_table = get_thing_table(metadata, 'rel_' + name + '_type2') create_table(rel_t2_table, index_commands(rel_t2_table, 'thing')) extra_thing_tables.setdefault(type_id, set()).add(rel_t2_table) else: rel_t2_table = get_thing_table(metadata, type2_name) #build the data rel_data_table = get_data_table(metadata, 'rel_' + name) create_table(rel_data_table, index_commands(rel_data_table, 'data')) rel = storage(type_id=type_id, type1_id=type1_id, type2_id=type2_id, name=name, rel_table=(rel_table, rel_t1_table, rel_t2_table, rel_data_table)) rel_types_id[type_id] = rel rel_types_name[name] = rel
def build_rel_tables(): for name, type1_name, type2_name, engine in dbm.relations(): type1_id = types_name[type1_name].type_id type2_id = types_name[type2_name].type_id type_id = check_type(rel_type_table, rel_type_table.c.name == name, dict(name = name, type1_id = type1_id, type2_id = type2_id)) metadata = make_metadata(engine) #relation table rel_table = get_rel_table(metadata, name) create_table(rel_table, index_commands(rel_table, 'rel')) #make thing1 table if required if engine == thing_engines[type1_name]: rel_t1_table = types_name[type1_name].thing_table else: #need to maintain an extra thing table? if dbm.extra_thing1.get(engine): rel_t1_table = get_thing_table(metadata, 'rel_' + name + '_type1') create_table(rel_t1_table, index_commands(rel_t1_table, 'thing')) extra_thing_tables.setdefault(type_id, set()).add(rel_t1_table) else: rel_t1_table = get_thing_table(metadata, type1_name) #make thing2 table if required if type1_id == type2_id: rel_t2_table = rel_t1_table elif engine == thing_engines[type2_name]: rel_t2_table = types_name[type2_name].thing_table else: if dbm.extra_thing2.get(engine): rel_t2_table = get_thing_table(metadata, 'rel_' + name + '_type2') create_table(rel_t2_table, index_commands(rel_t2_table, 'thing')) extra_thing_tables.setdefault(type_id, set()).add(rel_t2_table) else: rel_t2_table = get_thing_table(metadata, type2_name) #build the data rel_data_table = get_data_table(metadata, 'rel_' + name) create_table(rel_data_table, index_commands(rel_data_table, 'data')) rel = storage(type_id = type_id, type1_id = type1_id, type2_id = type2_id, name = name, rel_table = (rel_table, rel_t1_table, rel_t2_table, rel_data_table)) rel_types_id[type_id] = rel rel_types_name[name] = rel
def get_rel(rel_type_id, rel_id): r_table = get_rel_table(rel_type_id)[0] r, single = fetch_query(r_table, r_table.c.rel_id, rel_id) res = {} if not single else None for row in r: stor = storage(thing1_id=row.thing1_id, thing2_id=row.thing2_id, name=row.name, date=row.date) if single: res = stor else: res[row.rel_id] = stor return res
def get_thing(type_id, thing_id): table = types_id[type_id].thing_table r, single = fetch_query(table, table.c.thing_id, thing_id) #if single, only return one storage, otherwise make a dict res = {} if not single else None for row in r: stor = storage(ups = row.ups, downs = row.downs, date = row.date, deleted = row.deleted, spam = row.spam) if single: res = stor else: res[row.thing_id] = stor return res
def get_thing(type_id, thing_id): table = get_thing_table(type_id)[0] r, single = fetch_query(table, table.c.thing_id, thing_id) # if single, only return one storage, otherwise make a dict res = {} if not single else None for row in r: stor = storage(ups=row.ups, downs=row.downs, date=row.date, deleted=row.deleted, spam=row.spam) if single: res = stor # check that we got what we asked for if row.thing_id != thing_id: raise ValueError, ( "tdb_sql.py: there's shit in the plumbing." + " got %s, wanted %s" % (row.thing_id, thing_id) ) else: res[row.thing_id] = stor return res
def build_rel_tables(): for name, (type1_name, type2_name, engines) in dbm.rels_iter(): type1_id = types_name[type1_name].type_id type2_id = types_name[type2_name].type_id type_id = check_type(rel_type_table, rel_type_table.c.name == name, dict(name = name, type1_id = type1_id, type2_id = type2_id)) tables = [] for engine in engines: metadata = make_metadata(engine) #relation table rel_table = get_rel_table(metadata, name) create_table(rel_table, index_commands(rel_table, 'rel')) #make thing tables rel_t1_table = get_thing_table(metadata, type1_name) if type1_name == type2_name: rel_t2_table = rel_t1_table else: rel_t2_table = get_thing_table(metadata, type2_name) #build the data rel_data_table = get_data_table(metadata, 'rel_' + name) create_table(rel_data_table, index_commands(rel_data_table, 'data')) tables.append((rel_table, rel_t1_table, rel_t2_table, rel_data_table)) rel = storage(type_id = type_id, type1_id = type1_id, type2_id = type2_id, avoid_master_reads = dbm.avoid_master_reads.get(name), name = name, tables = tables) rel_types_id[type_id] = rel rel_types_name[name] = rel
def build_rel_tables(): for name, (type1_name, type2_name, engines) in dbm.rels_iter(): type1_id = types_name[type1_name].type_id type2_id = types_name[type2_name].type_id type_id = check_type(rel_type_table, name, dict(name = name, type1_id = type1_id, type2_id = type2_id)) tables = [] for engine in engines: metadata = make_metadata(engine) #relation table rel_table = get_rel_table(metadata, name) create_table(rel_table, index_commands(rel_table, 'rel')) #make thing tables rel_t1_table = get_thing_table(metadata, type1_name) if type1_name == type2_name: rel_t2_table = rel_t1_table else: rel_t2_table = get_thing_table(metadata, type2_name) #build the data rel_data_table = get_data_table(metadata, 'rel_' + name) create_table(rel_data_table, index_commands(rel_data_table, 'data')) tables.append((rel_table, rel_t1_table, rel_t2_table, rel_data_table)) rel = storage(type_id = type_id, type1_id = type1_id, type2_id = type2_id, avoid_master_reads = dbm.avoid_master_reads.get(name), name = name, tables = tables) rel_types_id[type_id] = rel rel_types_name[name] = rel
def build_thing_tables(): for name, thing_engine, data_engine in dbm.things(): type_id = check_type(type_table, type_table.c.name == name, dict(name = name)) thing_engines[name] = thing_engine #make thing table thing_table = get_thing_table(make_metadata(thing_engine), name) create_table(thing_table, index_commands(thing_table, 'thing')) #make data tables data_metadata = make_metadata(data_engine) data_table = get_data_table(data_metadata, name) create_table(data_table, index_commands(data_table, 'data')) #do we need another table? if thing_engine == data_engine: data_thing_table = thing_table else: #we're in a different engine, but do we need to maintain the extra table? if dbm.extra_data.get(data_engine): data_thing_table = get_thing_table(data_metadata, 'data_' + name) extra_thing_tables.setdefault(type_id, set()).add(data_thing_table) create_table(data_thing_table, index_commands(data_thing_table, 'thing')) else: data_thing_table = get_thing_table(data_metadata, name) thing = storage(type_id = type_id, name = name, thing_table = thing_table, data_table = (data_table, data_thing_table)) types_id[type_id] = thing types_name[name] = thing
def get_thing(type_id, thing_id): table = types_id[type_id].thing_table r, single = fetch_query(table, table.c.thing_id, thing_id) #if single, only return one storage, otherwise make a dict res = {} if not single else None for row in r: kwargs = { 'ups': row.ups, 'downs': row.downs, 'date': row.date, 'deleted': row.deleted, 'spam': row.spam } if type_id in (types_name["link"].type_id, types_name["comment"].type_id): kwargs['descendant_karma'] = row.descendant_karma stor = storage(**kwargs) if single: res = stor else: res[row.thing_id] = stor return res
def get_thing(type_id, thing_id): table = get_thing_table(type_id)[0] r, single = fetch_query(table, table.c.thing_id, thing_id) #if single, only return one storage, otherwise make a dict res = {} if not single else None for row in r: stor = storage(ups=row.ups, downs=row.downs, date=row.date, deleted=row.deleted, spam=row.spam) if single: res = stor # check that we got what we asked for if row.thing_id != thing_id: raise ValueError, ( "tdb_sql.py: there's shit in the plumbing." + " got %s, wanted %s" % (row.thing_id, thing_id)) else: res[row.thing_id] = stor return res
from r2.lib.utils import storage, storify, iters, Results, tup, TransSet from r2.config.databases import dbm, tz from pylons import g import operators import sqlalchemy as sa from sqlalchemy.databases import postgres from datetime import datetime import cPickle as pickle from copy import deepcopy import logging log_format = logging.Formatter('sql: %(message)s') settings = storage() settings.DEBUG = g.debug settings.DB_CREATE_TABLES = True settings.DB_APP_NAME = 'reddit' max_val_len = 1000 transactions = TransSet() BigInteger = postgres.PGBigInteger def alias_generator(): n = 1 while True: yield 'alias_%d' % n n += 1