Example #1
0
def get_data(table, thing_id):
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = storage() if single else {}
    for row in r:
        val = db2py(row.value, row.kind)
        stor = res if single else res.setdefault(row.thing_id, storage())
        stor[row.key] = val
    return res
Example #2
0
def get_data(table, thing_id):
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = storage() if single else {}
    for row in r:
        val = db2py(row.value, row.kind)
        stor = res if single else res.setdefault(row.thing_id, storage())
        stor[row.key] = val
    return res
Example #3
0
def describe_jobflows_cached(emr_connection):
    """Return a list of jobflows on this connection.

    It's good to cache this information because hitting AWS too often can
    result in rate limiting, and it's not particularly detrimental to have
    slightly out of date information in most cases. Non-running jobflows and
    information we don't need are discarded to reduce the size of cached data.

    """

    jobflows = emr_connection.describe_jobflows()

    r_jobflows = []
    for jf in jobflows:
        # skip old not live jobflows
        d = jf.steps[-1].creationdatetime.split('T')[0]
        last_step_start = datetime.datetime.strptime(d, '%Y-%m-%d').date()
        now = datetime.datetime.now().date()
        if (jf.state not in LIVE_STATES and
            now - last_step_start > datetime.timedelta(2)):
            continue

        # keep only fields we need
        r_jf = storage(name=jf.name,
                       jobflowid=jf.jobflowid,
                       state=jf.state)
        r_bootstrapactions = []
        for i in jf.bootstrapactions:
            s = storage(name=i.name,
                        path=i.path,
                        args=[a.value for a in i.args])
            r_bootstrapactions.append(s)
        r_jf['bootstrapactions'] = r_bootstrapactions
        r_steps = []
        for i in jf.steps:
            s = storage(name=i.name,
                        state=i.state,
                        jar=i.jar,
                        args=[a.value for a in i.args])
            r_steps.append(s)
        r_jf['steps'] = r_steps
        r_instancegroups = []
        for i in jf.instancegroups:
            s = storage(name=i.name,
                        instancegroupid=i.instancegroupid,
                        instancerequestcount=i.instancerequestcount)
            r_instancegroups.append(s)
        r_jf['instancegroups'] = r_instancegroups
        r_jobflows.append(r_jf)
    return r_jobflows
Example #4
0
def get_data(table, thing_id):
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = storage() if single else {}
    for row in r:
        val = db2py(row.value, row.kind)
        stor = res if single else res.setdefault(row.thing_id, storage())
        if single and row.thing_id != thing_id:
            raise ValueError, ("tdb_sql.py: there's shit in the plumbing." +
                               " got %s, wanted %s" % (row.thing_id, thing_id))
        stor[row.key] = val

    return res
Example #5
0
def get_data(table, thing_id):
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = storage() if single else {}
    for row in r:
        val = db2py(row.value, row.kind)
        stor = res if single else res.setdefault(row.thing_id, storage())
        if single and row.thing_id != thing_id:
            raise ValueError, ("tdb_sql.py: there's shit in the plumbing." 
                               + " got %s, wanted %s" % (row.thing_id,
                                                         thing_id))
        stor[row.key] = val

    return res
Example #6
0
def describe_jobflows_cached(emr_connection):
    """Return a list of jobflows on this connection.

    It's good to cache this information because hitting AWS too often can
    result in rate limiting, and it's not particularly detrimental to have
    slightly out of date information in most cases. Non-running jobflows and
    information we don't need are discarded to reduce the size of cached data.

    """

    jobflows = emr_connection.describe_jobflows()

    r_jobflows = []
    for jf in jobflows:
        # skip old not live jobflows
        d = jf.steps[-1].creationdatetime.split('T')[0]
        last_step_start = datetime.datetime.strptime(d, '%Y-%m-%d').date()
        now = datetime.datetime.now().date()
        if (jf.state not in LIVE_STATES
                and now - last_step_start > datetime.timedelta(2)):
            continue

        # keep only fields we need
        r_jf = storage(name=jf.name, jobflowid=jf.jobflowid, state=jf.state)
        r_bootstrapactions = []
        for i in jf.bootstrapactions:
            s = storage(name=i.name,
                        path=i.path,
                        args=[a.value for a in i.args])
            r_bootstrapactions.append(s)
        r_jf['bootstrapactions'] = r_bootstrapactions
        r_steps = []
        for i in jf.steps:
            s = storage(name=i.name,
                        state=i.state,
                        jar=i.jar,
                        args=[a.value for a in i.args])
            r_steps.append(s)
        r_jf['steps'] = r_steps
        r_instancegroups = []
        for i in jf.instancegroups:
            s = storage(name=i.name,
                        instancegroupid=i.instancegroupid,
                        instancerequestcount=i.instancerequestcount)
            r_instancegroups.append(s)
        r_jf['instancegroups'] = r_instancegroups
        r_jobflows.append(r_jf)
    return r_jobflows
Example #7
0
def build_thing_tables():
    for name, engines in dbm.things_iter():
        type_id = check_type(type_table, name, dict(name=name))

        tables = []
        for engine in engines:
            metadata = make_metadata(engine)

            #make thing table
            thing_table = get_thing_table(metadata, name)
            create_table(thing_table, index_commands(thing_table, 'thing'))

            #make data tables
            data_table = get_data_table(metadata, name)
            create_table(data_table, index_commands(data_table, 'data'))

            tables.append((thing_table, data_table))

        thing = storage(type_id=type_id,
                        name=name,
                        avoid_master_reads=dbm.avoid_master_reads.get(name),
                        tables=tables)

        types_id[type_id] = thing
        types_name[name] = thing
Example #8
0
 def build_fn(row):
     # return Storage objects with just the requested props
     props = {}
     for prop in ret_props:
         db_prop = prop[1:]  # column name doesn't have _ prefix
         props[prop] = getattr(row, db_prop)
     return storage(**props)
Example #9
0
def build_thing_tables():
    for name, engines in dbm.things_iter():
        type_id = check_type(type_table,
                             type_table.c.name == name,
                             dict(name = name))

        tables = []
        for engine in engines:
            metadata = make_metadata(engine)

            #make thing table
            thing_table = get_thing_table(metadata, name)
            create_table(thing_table,
                         index_commands(thing_table, 'thing'))

            #make data tables
            data_table = get_data_table(metadata, name)
            create_table(data_table,
                         index_commands(data_table, 'data'))

            tables.append((thing_table, data_table))

        thing = storage(type_id = type_id,
                        name = name,
                        avoid_master_reads = dbm.avoid_master_reads.get(name),
                        tables = tables)

        types_id[type_id] = thing
        types_name[name] = thing
Example #10
0
 def build_fn(row):
     # return Storage objects with just the requested props
     props = {}
     for prop in ret_props:
         db_prop = prop[1:]  # column name doesn't have _ prefix
         props[prop] = getattr(row, db_prop)
     return storage(**props)
Example #11
0
def build_rel_tables():
    for name, type1_name, type2_name, engine in dbm.relations():
        type1_id = types_name[type1_name].type_id
        type2_id = types_name[type2_name].type_id
        type_id = check_type(
            rel_type_table, rel_type_table.c.name == name,
            dict(name=name, type1_id=type1_id, type2_id=type2_id))

        metadata = make_metadata(engine)

        #relation table
        rel_table = get_rel_table(metadata, name)
        create_table(rel_table, index_commands(rel_table, 'rel'))

        #make thing1 table if required
        if engine == thing_engines[type1_name]:
            rel_t1_table = types_name[type1_name].thing_table
        else:
            #need to maintain an extra thing table?
            if dbm.extra_thing1.get(engine):
                rel_t1_table = get_thing_table(metadata,
                                               'rel_' + name + '_type1')
                create_table(rel_t1_table,
                             index_commands(rel_t1_table, 'thing'))
                extra_thing_tables.setdefault(type_id, set()).add(rel_t1_table)
            else:
                rel_t1_table = get_thing_table(metadata, type1_name)

        #make thing2 table if required
        if type1_id == type2_id:
            rel_t2_table = rel_t1_table
        elif engine == thing_engines[type2_name]:
            rel_t2_table = types_name[type2_name].thing_table
        else:
            if dbm.extra_thing2.get(engine):
                rel_t2_table = get_thing_table(metadata,
                                               'rel_' + name + '_type2')
                create_table(rel_t2_table,
                             index_commands(rel_t2_table, 'thing'))
                extra_thing_tables.setdefault(type_id, set()).add(rel_t2_table)
            else:
                rel_t2_table = get_thing_table(metadata, type2_name)

        #build the data
        rel_data_table = get_data_table(metadata, 'rel_' + name)
        create_table(rel_data_table, index_commands(rel_data_table, 'data'))

        rel = storage(type_id=type_id,
                      type1_id=type1_id,
                      type2_id=type2_id,
                      name=name,
                      rel_table=(rel_table, rel_t1_table, rel_t2_table,
                                 rel_data_table))

        rel_types_id[type_id] = rel
        rel_types_name[name] = rel
Example #12
0
def build_rel_tables():
    for name, type1_name, type2_name, engine in dbm.relations():
        type1_id = types_name[type1_name].type_id
        type2_id = types_name[type2_name].type_id
        type_id = check_type(rel_type_table,
                             rel_type_table.c.name == name,
                             dict(name = name,
                                  type1_id = type1_id,
                                  type2_id = type2_id))

        metadata = make_metadata(engine)
        
        #relation table
        rel_table = get_rel_table(metadata, name)
        create_table(rel_table,
                     index_commands(rel_table, 'rel'))

        #make thing1 table if required
        if engine == thing_engines[type1_name]:
            rel_t1_table = types_name[type1_name].thing_table
        else:
            #need to maintain an extra thing table?
            if dbm.extra_thing1.get(engine):
                rel_t1_table = get_thing_table(metadata, 'rel_' + name + '_type1')
                create_table(rel_t1_table, index_commands(rel_t1_table, 'thing'))
                extra_thing_tables.setdefault(type_id, set()).add(rel_t1_table)
            else:
                rel_t1_table = get_thing_table(metadata, type1_name)

        #make thing2 table if required
        if type1_id == type2_id:
            rel_t2_table = rel_t1_table
        elif engine == thing_engines[type2_name]:
            rel_t2_table = types_name[type2_name].thing_table
        else:
            if dbm.extra_thing2.get(engine):
                rel_t2_table = get_thing_table(metadata, 'rel_' + name + '_type2')
                create_table(rel_t2_table, index_commands(rel_t2_table, 'thing'))
                extra_thing_tables.setdefault(type_id, set()).add(rel_t2_table)
            else:
                rel_t2_table = get_thing_table(metadata, type2_name)

        #build the data
        rel_data_table = get_data_table(metadata, 'rel_' + name)
        create_table(rel_data_table,
                     index_commands(rel_data_table, 'data'))

        rel = storage(type_id = type_id,
                      type1_id = type1_id,
                      type2_id = type2_id,
                      name = name,
                      rel_table = (rel_table, rel_t1_table, rel_t2_table, rel_data_table))

        rel_types_id[type_id] = rel
        rel_types_name[name] = rel
Example #13
0
def get_rel(rel_type_id, rel_id):
    r_table = get_rel_table(rel_type_id)[0]
    r, single = fetch_query(r_table, r_table.c.rel_id, rel_id)

    res = {} if not single else None
    for row in r:
        stor = storage(thing1_id=row.thing1_id, thing2_id=row.thing2_id, name=row.name, date=row.date)
        if single:
            res = stor
        else:
            res[row.rel_id] = stor
    return res
Example #14
0
def get_rel(rel_type_id, rel_id):
    r_table = get_rel_table(rel_type_id)[0]
    r, single = fetch_query(r_table, r_table.c.rel_id, rel_id)

    res = {} if not single else None
    for row in r:
        stor = storage(thing1_id=row.thing1_id,
                       thing2_id=row.thing2_id,
                       name=row.name,
                       date=row.date)
        if single:
            res = stor
        else:
            res[row.rel_id] = stor
    return res
Example #15
0
def get_thing(type_id, thing_id):
    table = types_id[type_id].thing_table
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = {} if not single else None
    for row in r:
        stor = storage(ups = row.ups,
                       downs = row.downs,
                       date = row.date,
                       deleted = row.deleted,
                       spam = row.spam)
        if single:
            res = stor
        else:
            res[row.thing_id] = stor
    return res
Example #16
0
def get_thing(type_id, thing_id):
    table = types_id[type_id].thing_table
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = {} if not single else None
    for row in r:
        stor = storage(ups = row.ups,
                       downs = row.downs,
                       date = row.date,
                       deleted = row.deleted,
                       spam = row.spam)
        if single:
            res = stor
        else:
            res[row.thing_id] = stor
    return res
Example #17
0
def get_thing(type_id, thing_id):
    table = get_thing_table(type_id)[0]
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    # if single, only return one storage, otherwise make a dict
    res = {} if not single else None
    for row in r:
        stor = storage(ups=row.ups, downs=row.downs, date=row.date, deleted=row.deleted, spam=row.spam)
        if single:
            res = stor
            # check that we got what we asked for
            if row.thing_id != thing_id:
                raise ValueError, (
                    "tdb_sql.py: there's shit in the plumbing." + " got %s, wanted %s" % (row.thing_id, thing_id)
                )
        else:
            res[row.thing_id] = stor
    return res
Example #18
0
def build_rel_tables():
    for name, (type1_name, type2_name, engines) in dbm.rels_iter():
        type1_id = types_name[type1_name].type_id
        type2_id = types_name[type2_name].type_id
        type_id = check_type(rel_type_table,
                             rel_type_table.c.name == name,
                             dict(name = name,
                                  type1_id = type1_id,
                                  type2_id = type2_id))

        tables = []
        for engine in engines:
            metadata = make_metadata(engine)

            #relation table
            rel_table = get_rel_table(metadata, name)
            create_table(rel_table, index_commands(rel_table, 'rel'))

            #make thing tables
            rel_t1_table = get_thing_table(metadata, type1_name)
            if type1_name == type2_name:
                rel_t2_table = rel_t1_table
            else:
                rel_t2_table = get_thing_table(metadata, type2_name)

            #build the data
            rel_data_table = get_data_table(metadata, 'rel_' + name)
            create_table(rel_data_table,
                         index_commands(rel_data_table, 'data'))

            tables.append((rel_table,
                           rel_t1_table,
                           rel_t2_table,
                           rel_data_table))

        rel = storage(type_id = type_id,
                      type1_id = type1_id,
                      type2_id = type2_id,
                      avoid_master_reads = dbm.avoid_master_reads.get(name),
                      name = name,
                      tables = tables)

        rel_types_id[type_id] = rel
        rel_types_name[name] = rel
Example #19
0
def build_rel_tables():
    for name, (type1_name, type2_name, engines) in dbm.rels_iter():
        type1_id = types_name[type1_name].type_id
        type2_id = types_name[type2_name].type_id
        type_id = check_type(rel_type_table,
                             name,
                             dict(name = name,
                                  type1_id = type1_id,
                                  type2_id = type2_id))

        tables = []
        for engine in engines:
            metadata = make_metadata(engine)

            #relation table
            rel_table = get_rel_table(metadata, name)
            create_table(rel_table, index_commands(rel_table, 'rel'))

            #make thing tables
            rel_t1_table = get_thing_table(metadata, type1_name)
            if type1_name == type2_name:
                rel_t2_table = rel_t1_table
            else:
                rel_t2_table = get_thing_table(metadata, type2_name)

            #build the data
            rel_data_table = get_data_table(metadata, 'rel_' + name)
            create_table(rel_data_table,
                         index_commands(rel_data_table, 'data'))

            tables.append((rel_table,
                           rel_t1_table,
                           rel_t2_table,
                           rel_data_table))

        rel = storage(type_id = type_id,
                      type1_id = type1_id,
                      type2_id = type2_id,
                      avoid_master_reads = dbm.avoid_master_reads.get(name),
                      name = name,
                      tables = tables)

        rel_types_id[type_id] = rel
        rel_types_name[name] = rel
Example #20
0
def build_thing_tables():
    for name, thing_engine, data_engine in dbm.things():
        type_id = check_type(type_table,
                             type_table.c.name == name,
                             dict(name = name))

        thing_engines[name] = thing_engine

        #make thing table
        thing_table = get_thing_table(make_metadata(thing_engine), name)
        create_table(thing_table,
                     index_commands(thing_table, 'thing'))

        #make data tables
        data_metadata = make_metadata(data_engine)
        data_table = get_data_table(data_metadata, name)
        create_table(data_table,
                     index_commands(data_table, 'data'))

        #do we need another table?
        if thing_engine == data_engine:
            data_thing_table = thing_table
        else:
            #we're in a different engine, but do we need to maintain the extra table?
            if dbm.extra_data.get(data_engine):
                data_thing_table = get_thing_table(data_metadata, 'data_' + name)
                extra_thing_tables.setdefault(type_id, set()).add(data_thing_table)
                create_table(data_thing_table,
                             index_commands(data_thing_table, 'thing'))
            else:
                data_thing_table = get_thing_table(data_metadata, name)

        thing = storage(type_id = type_id,
                        name = name,
                        thing_table = thing_table,
                        data_table = (data_table, data_thing_table))

        types_id[type_id] = thing
        types_name[name] = thing
Example #21
0
def get_thing(type_id, thing_id):
    table = types_id[type_id].thing_table
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = {} if not single else None
    for row in r:
        kwargs = { 'ups': row.ups,
                   'downs': row.downs,
                   'date': row.date,
                   'deleted': row.deleted,
                   'spam': row.spam }
        if type_id in (types_name["link"].type_id, types_name["comment"].type_id):
            kwargs['descendant_karma'] = row.descendant_karma

        stor = storage(**kwargs)

        if single:
            res = stor
        else:
            res[row.thing_id] = stor
    return res
Example #22
0
def build_thing_tables():
    for name, thing_engine, data_engine in dbm.things():
        type_id = check_type(type_table,
                             type_table.c.name == name,
                             dict(name = name))

        thing_engines[name] = thing_engine

        #make thing table
        thing_table = get_thing_table(make_metadata(thing_engine), name)
        create_table(thing_table,
                     index_commands(thing_table, 'thing'))

        #make data tables
        data_metadata = make_metadata(data_engine)
        data_table = get_data_table(data_metadata, name)
        create_table(data_table,
                     index_commands(data_table, 'data'))

        #do we need another table?
        if thing_engine == data_engine:
            data_thing_table = thing_table
        else:
            #we're in a different engine, but do we need to maintain the extra table?
            if dbm.extra_data.get(data_engine):
                data_thing_table = get_thing_table(data_metadata, 'data_' + name)
                extra_thing_tables.setdefault(type_id, set()).add(data_thing_table)
                create_table(data_thing_table,
                             index_commands(data_thing_table, 'thing'))
            else:
                data_thing_table = get_thing_table(data_metadata, name)
        
        thing = storage(type_id = type_id,
                        name = name,
                        thing_table = thing_table,
                        data_table = (data_table, data_thing_table))

        types_id[type_id] = thing
        types_name[name] = thing
Example #23
0
def get_thing(type_id, thing_id):
    table = get_thing_table(type_id)[0]
    r, single = fetch_query(table, table.c.thing_id, thing_id)

    #if single, only return one storage, otherwise make a dict
    res = {} if not single else None
    for row in r:
        stor = storage(ups=row.ups,
                       downs=row.downs,
                       date=row.date,
                       deleted=row.deleted,
                       spam=row.spam)
        if single:
            res = stor
            # check that we got what we asked for
            if row.thing_id != thing_id:
                raise ValueError, (
                    "tdb_sql.py: there's shit in the plumbing." +
                    " got %s, wanted %s" % (row.thing_id, thing_id))
        else:
            res[row.thing_id] = stor
    return res
Example #24
0
from r2.lib.utils import storage, storify, iters, Results, tup, TransSet
from r2.config.databases import dbm, tz
from pylons import g

import operators
import sqlalchemy as sa
from sqlalchemy.databases import postgres
from datetime import datetime
import cPickle as pickle

from copy import deepcopy

import logging
log_format = logging.Formatter('sql: %(message)s')

settings = storage()
settings.DEBUG = g.debug
settings.DB_CREATE_TABLES = True
settings.DB_APP_NAME = 'reddit'

max_val_len = 1000

transactions = TransSet()

BigInteger = postgres.PGBigInteger

def alias_generator():
    n = 1
    while True:
        yield 'alias_%d' % n
        n += 1
Example #25
0
from r2.lib.utils import storage, storify, iters, Results, tup, TransSet
from r2.config.databases import dbm, tz
from pylons import g

import operators
import sqlalchemy as sa
from sqlalchemy.databases import postgres
from datetime import datetime
import cPickle as pickle

from copy import deepcopy

import logging
log_format = logging.Formatter('sql: %(message)s')

settings = storage()
settings.DEBUG = g.debug
settings.DB_CREATE_TABLES = True
settings.DB_APP_NAME = 'reddit'

max_val_len = 1000

transactions = TransSet()

BigInteger = postgres.PGBigInteger

def alias_generator():
    n = 1
    while True:
        yield 'alias_%d' % n
        n += 1