def _get_scheduled_jobs(self, dynamodb_connection): # noqa """ WARNING -- this method requires cleanup; the user must remember to delete the table once complete. For example: >>> NEW_JOB = {'log_version': 'ad_click', 'log_schema_version': '1'} >>> def cool_test_fn(dynamodb_connection): >>> tsj = TestScheduledJobs() >>> table, scheduled_jobs = tsj._get_scheduled_jobs(dynamodb_connection) >>> assert scheduled_jobs.put(**NEW_JOB) >>> yield scheduled_jobs >>> assert table.delete() # THIS IS THE KEY CLEANUP!! """ avro_schema = get_avro_schema('mycroft/avro/scheduled_jobs.json') index_load_status = GlobalAllIndex( ScheduledJobs.INDEX_LOAD_STATUS, parts=[HashKey('load_status')]) index_et_status = GlobalAllIndex( ScheduledJobs.INDEX_ET_STATUS, parts=[HashKey('et_status')]) index_load_status = GlobalAllIndex( ScheduledJobs.INDEX_LOAD_STATUS, parts=[HashKey('load_status')]) index_log_name_and_log_schema_version = GlobalAllIndex( ScheduledJobs.INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION, parts=[HashKey('log_name'), RangeKey('log_schema_version')]) table = Table.create( 'ScheduledJobs', schema=NAME_TO_SCHEMA['scheduled_jobs'], connection=dynamodb_connection, global_indexes=[index_et_status, index_load_status, index_log_name_and_log_schema_version]) return table, ScheduledJobs(persistence_object=table, avro_schema_object=avro_schema)
def createGamesTable(db): try: hostStatusDate = GlobalAllIndex("hostStatusDate", parts=[HashKey("HostId"), RangeKey("StatusDate")]) opponentStatusDate = GlobalAllIndex("opponentStatusDate", parts=[HashKey("OpponentId"), RangeKey("StatusDate")]) #global secondary indexes GSI = [hostStatusDate, opponentStatusDate] gamesTable = Table.create("Games", schema=[HashKey("GameId")], throughput={ 'read':1, 'write':1 }, global_indexes=GSI, connection=db) except JSONResponseError, jre: try: gamesTable = Table("Games", connection=db) except Exception, e: print "Games Table doesn't exist."
def create(self): Table.create(self.table_name, schema=[ HashKey(Tweet.tweet_user_id), RangeKey(Tweet.tweet_id), ], throughput=standard_throughput, indexes={ AllIndex(self.index_timestamp, parts=[ HashKey(Tweet.tweet_user_id), RangeKey(Tweet.ts_ms) ]) }, global_indexes={ GlobalAllIndex(self.index_site, parts=[ HashKey(keys.entity_site), RangeKey(Tweet.tweet_id) ], throughput=standard_throughput), GlobalAllIndex(self.index_league, parts=[ HashKey(keys.entity_league), RangeKey(Tweet.tweet_id) ], throughput=standard_throughput), GlobalAllIndex(self.index_team, parts=[ HashKey(keys.entity_team), RangeKey(Tweet.tweet_id) ], throughput=standard_throughput) })
def create(self): Table.create(self.table_name, schema=[ HashKey(keys.entity_profile), RangeKey(time_keys.ts_add, data_type=NUMBER) ], throughput=standard_throughput, indexes={ AllIndex(self.index_delta, parts=[ HashKey(keys.entity_profile), RangeKey(time_keys.ts_delta, data_type=NUMBER) ]), AllIndex(self.index_cut, parts=[ HashKey(keys.entity_profile), RangeKey(time_keys.ts_cut, data_type=NUMBER) ]) }, global_indexes={ GlobalAllIndex(self.index_team, parts=[ HashKey(keys.entity_team), RangeKey(time_keys.ts_add, data_type=NUMBER) ], throughput=standard_throughput), GlobalAllIndex(self.index_league, parts=[ HashKey(keys.entity_league), RangeKey(time_keys.ts_add, data_type=NUMBER) ], throughput=standard_throughput), GlobalAllIndex(self.index_twitter, parts=[ HashKey(keys.entity_twitter), RangeKey(time_keys.ts_add, data_type=NUMBER) ], throughput=standard_throughput), GlobalAllIndex(self.index_site, parts=[ HashKey(keys.entity_site), RangeKey(time_keys.ts_add, data_type=NUMBER) ], throughput=standard_throughput) }) print 'creating entity history table'
class RacingCrewCollection(DBTable): table_name = 'racing_crew' item_class = Race global_indexes = [ GlobalAllIndex('race-index', parts=[HashKey('race')]), GlobalAllIndex('event-index', parts=[HashKey('event')]) ] def __init__(self, dbconn): super(RacingCrewCollection, self).__init__(RacingCrewCollection.table_name, DBTable.simple_schema, RacingCrewCollection.global_indexes, dbconn)
def clear_db(): global org_table global event_table global regatta_table # model.OrganizationCollection(conn).delete() org_table.delete() # model.EventCollection(conn).delete() event_table.delete() # model.RegattaCollection(conn).delete() regatta_table.delete() app.logger.info("Sleeping for 60 seconds to allow tables to delete") time.sleep(60) app.logger.debug("Creating organization table") org_table = Table.create( application.config['ORG_TABLE'], throughput={ 'read': 1, 'write': 1 }, connection=conn.conn, schema=[HashKey('_id')], global_indexes=[GlobalAllIndex('name-index', parts=[HashKey('name')])]) app.logger.debug("Creating events table") event_table = Table.create(application.config['EVENT_TABLE'], throughput={ 'read': 1, 'write': 1 }, connection=conn.conn, schema=[HashKey('_id')], global_indexes=[ GlobalAllIndex('event-index', parts=[HashKey('event')]) ]) app.logger.debug("Creating regatta table") regatta_table = Table.create( application.config['REGATTA_TABLE'], throughput={ 'read': 1, 'write': 1 }, connection=conn.conn, schema=[HashKey('_id')], global_indexes=[GlobalAllIndex('name-index', parts=[HashKey('name')])]) app.logger.info("Sleeping for 60 seconds to allow tables to create") time.sleep(60)
def tableCreateKwargs(self): return dict( schema=[ HashKey("instance_id"), RangeKey("date_hour"), ], throughput={ "read": (taurus.engine.config.getint( "dynamodb", "instance_data_hourly_throughput_read")), "write": (taurus.engine.config.getint( "dynamodb", "instance_data_hourly_throughput_write")) }, global_indexes=[ GlobalAllIndex("taurus.instance_data_hourly-date_hour_index", parts=[HashKey("date"), RangeKey("hour")], throughput={ "read": taurus.engine.config.getint( "dynamodb", "instance_data_hourly_throughput_read"), "write": taurus.engine.config.getint( "dynamodb", "instance_data_hourly_throughput_write") }) ])
def createDynamoObject(): try: users = Table.create( 'data', schema=[HashKey('id')], global_indexes=[ GlobalAllIndex('EverythingIndex', parts=[HashKey('name')]) ], connection=boto.dynamodb2.connect_to_region('us-west-2')) except boto.exception.JSONResponseError: users = Table('data', connection=boto.dynamodb2.connect_to_region('us-west-2')) print "1) Table 'data' already created." #On first Run this wont insert data because of delay to create table on aws server side. try: users.put_item( data={ 'id': '3', 'type': 'person', 'name': 'dummy', 'activities': ['activity one'], }) except: print "2) Dummy Data already added." return users
def _extract_index(index_data, global_index=False): ''' Instantiates and returns an AllIndex object given a valid index configuration ''' parsed_data = {} keys = [] for key, value in six.iteritems(index_data): for item in value: for field, data in six.iteritems(item): if field == 'hash_key': parsed_data['hash_key'] = data elif field == 'hash_key_data_type': parsed_data['hash_key_data_type'] = data elif field == 'range_key': parsed_data['range_key'] = data elif field == 'range_key_data_type': parsed_data['range_key_data_type'] = data elif field == 'name': parsed_data['name'] = data elif field == 'read_capacity_units': parsed_data['read_capacity_units'] = data elif field == 'write_capacity_units': parsed_data['write_capacity_units'] = data if parsed_data['hash_key']: keys.append( HashKey( parsed_data['hash_key'], data_type=parsed_data['hash_key_data_type'] ) ) if parsed_data['range_key']: keys.append( RangeKey( parsed_data['range_key'], data_type=parsed_data['range_key_data_type'] ) ) if ( global_index and parsed_data['read_capacity_units'] and parsed_data['write_capacity_units']): parsed_data['throughput'] = { 'read': parsed_data['read_capacity_units'], 'write': parsed_data['write_capacity_units'] } if parsed_data['name'] and len(keys) > 0: if global_index: return GlobalAllIndex( parsed_data['name'], parts=keys, throughput=parsed_data['throughput'] ) else: return AllIndex( parsed_data['name'], parts=keys )
def createDynamoObject(name): try: users = Table.create(name, schema=[HashKey('id')], throughput={'read': db_read_cap, 'write': db_write_cap}, global_indexes=[GlobalAllIndex('EverythingIndex' , parts=[HashKey('name')])], connection=boto.dynamodb2.connect_to_region(AWS_REGION)) except: users = Table(name, connection=boto.dynamodb2.connect_to_region('us-west-2' )) print "1) Table 'data' already created for table: " + name # On first Run this wont insert data because of delay to create table on aws server side. try: users.put_item(data={ 'id': '3', 'type': 'person', 'name': 'dummy', 'activities': ['activity one'], }) except: print '2) Dummy Data already added for tabe: ' + name return users
def createTable(isLocal, localPort): """Used to create table for Dyanmo DB""" SessionTable.LOCAL_PORT = localPort secondaryIndex = [ GlobalAllIndex('expiration-index', parts=[HashKey('expiration', data_type=NUMBER)], throughput={ 'read': 5, 'write': 5 }) ] if isLocal: try: Table.create(SessionTable.TABLE_NAME, schema=[HashKey(SessionTable.KEY_NAME)], global_indexes=secondaryIndex, connection=SessionTable.getLocalConnection()) except exceptions.JSONResponseError as jre: if jre.status == 400 and "preexisting" in jre.message.lower(): #table already exists pass else: Table.create(SessionTable.TABLE_NAME, schema=[HashKey(SessionTable.KEY_NAME)], global_indexes=secondaryIndex)
def get_indexes(all_indexes): indexes = [] global_indexes = [] for index in all_indexes: name = index['name'] schema = get_schema_param(index.get('hash_key_name'), index.get('hash_key_type'), index.get('range_key_name'), index.get('range_key_type')) throughput = { 'read': index.get('read_capacity', 1), 'write': index.get('write_capacity', 1) } if index['type'] == 'all': indexes.append(AllIndex(name, parts=schema)) elif index['type'] == 'global_all': global_indexes.append(GlobalAllIndex(name, parts=schema, throughput=throughput)) elif index['type'] == 'global_include': global_indexes.append(GlobalIncludeIndex(name, parts=schema, throughput=throughput, includes=index['includes'])) elif index['type'] == 'global_keys_only': global_indexes.append(GlobalKeysOnlyIndex(name, parts=schema, throughput=throughput)) elif index['type'] == 'include': indexes.append(IncludeIndex(name, parts=schema, includes=index['includes'])) elif index['type'] == 'keys_only': indexes.append(KeysOnlyIndex(name, parts=schema)) return indexes, global_indexes
def createFreshTable(self): """ Create a fresh empty distance table. """ # delete existing table if it exists try: self.__getTable().delete() time.sleep(10) except: pass # do nothing. Maybe there was no existing table # create new table tableConnectionParams = parseConnectionString(self.tableConnString) return Table.create(tableConnectionParams['name'], schema=[HashKey('from'), RangeKey('to')], throughput={ 'read': 1, 'write': 2, }, global_indexes=[ GlobalAllIndex( 'reverseIndex', parts=[HashKey('to'), RangeKey('from')], throughput={ 'read': 1, 'write': 2, }) ], connection=getDbConnection(tableConnectionParams))
class StageCollection(DBTable): table_name = 'stage' global_indexes = [GlobalAllIndex('event-index', parts=[HashKey('event')])] def __init__(self, dbconn): super(StageCollection, self).__init__(StageCollection.table_name, DBTable.simple_schema, StageCollection.global_indexes, dbconn)
class EventCollection(DBTable): table_name = 'events' global_indexes = [GlobalAllIndex('event-index', parts=[HashKey('event')])] def __init__(self, dbconn): super(EventCollection, self).__init__(EventCollection.table_name, DBTable.simple_schema, None, dbconn) self.schema = DBTable.simple_schema
class OrganizationCollection(DBTable): table_name = 'organizations' global_indexes = [GlobalAllIndex('NameIndex', parts=[HashKey('name')])] def __init__(self, dbconn): super(OrganizationCollection, self).__init__(OrganizationCollection.table_name, DBTable.simple_schema, OrganizationCollection.global_indexes, dbconn)
def test_query_with_global_indexes(): table = Table.create('messages', schema=[ HashKey('subject'), RangeKey('version'), ], global_indexes=[ GlobalAllIndex('topic-created_at-index', parts=[ HashKey('topic'), RangeKey('created_at', data_type='N') ], throughput={ 'read': 6, 'write': 1 }), GlobalAllIndex('status-created_at-index', parts=[ HashKey('status'), RangeKey('created_at', data_type='N') ], throughput={ 'read': 2, 'write': 1 }) ]) item_data = { 'subject': 'Check this out!', 'version': '1', 'created_at': 0, 'status': 'inactive' } item = Item(table, item_data) item.save(overwrite=True) item['version'] = '2' item.save(overwrite=True) results = table.query(status__eq='active') list(results).should.have.length_of(0)
class RegattaCollection(DBTable): table_name = 'regatta' #item_class = Regatta global_indexes = [GlobalAllIndex('name-index', parts=[HashKey('name')])] def __init__(self, dbconn): super(RegattaCollection, self).__init__(RegattaCollection.table_name, DBTable.simple_schema, RegattaCollection.global_indexes, dbconn)
def get_table(): return Table(table_name, schema=[ HashKey('CreatedAt'), RangeKey('Count'), ], global_indexes=[ GlobalAllIndex('CountsIndex', parts=[ HashKey('EventType',data_type=STRING), RangeKey('Timestamp',data_type=STRING) ]) ])
class AthleteCollection(DBTable): table_name = 'athletes' global_indexes = [ GlobalAllIndex('OrgIndex', parts=[HashKey('organization')]) ] def __init__(self, dbconn): super(AthleteCollection, self).__init__(AthleteCollection.table_name, DBTable.simple_schema, AthleteCollection.global_indexes, dbconn)
def __getMappingsTable(self): return getDbTableWithSchemaAndGlobalIndexes( self.docClusterMappingTable, [ HashKey('clusterId'), RangeKey('docId') ], [ GlobalAllIndex('docId-clusterId-index', parts=[ HashKey('docId'), RangeKey('clusterId') ]) ] )
class Audit(DBTable): table_name = 'audit' schema = [HashKey('timeStamp'), RangeKey('user')] global_indexes = [GlobalAllIndex('race-index', parts=[HashKey('user')])] def __init__(self, dbconn): self.logger = logging.getLogger('audit') super(Audit, self).__init__(Audit.table_name, Audit.schema, Audit.global_indexes, dbconn) def info(self, message): self.logger.info(message) self.insert(data={'message': message, 'timestamp': time.time()})
def sendtodynamo_cnn(cnnjson): ''' Send json to DynamoDB Assumes that article timestamps have been deduped to avoid collisions ''' conn = connect_to_region('us-west-2', aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) hashkey = "CNN" # primary key to be used for DynamoDB table try: table = Table('CNN', connection=conn) table.describe() except boto.exception.JSONResponseError: print "Creating table" table = Table.create( 'CNN', schema=[HashKey('source'), RangeKey('tstamp', data_type=NUMBER)], throughput={ 'read': 25, 'write': 25 }, indexes=[ GlobalAllIndex('showidx', parts=[HashKey('show')], throughput={ 'read': 10, 'write': 5 }) ]) iteration = 0 for article in cnnjson: # Iterate through list of articles and upload to table rangekey = float(article['timestamp']) rowdata = { 'source': hashkey, 'tstamp': rangekey, 'cnnShow': article['show'] } for key in article.keys(): rowdata[key] = article[key] item = table.put_item(data=rowdata) iteration += 1 if iteration % 100 == 0: print "Uploaded " + iteration + " articles" return None
def etl_records(dynamodb_connection): avro_schema = get_avro_schema('mycroft/avro/etl_record.json') index_job_id = GlobalAllIndex( ETLRecords.INDEX_JOB_ID_AND_DATA_DATE, parts=[HashKey('job_id'), RangeKey('data_date')]) table = Table.create('ETLRecords', schema=NAME_TO_SCHEMA['etl_records'], connection=dynamodb_connection, global_indexes=[index_job_id]) etl_records = ETLRecords(persistence_object=table, avro_schema_object=avro_schema) for etl_record in SAMPLE_RECORD_JOBS: assert etl_records.put(**etl_record) yield etl_records assert table.delete()
def __getTable(self): """ Get the clusters table. """ return getDbTableWithSchemaAndGlobalIndexes( self.tableConnString, [ HashKey('clusterId') ], [ GlobalAllIndex('isCurrent-clusterId-index', parts=[ HashKey('isCurrent'), RangeKey('clusterId') ]) ] )
def test_create_with_global_indexes(): conn = boto.dynamodb2.layer1.DynamoDBConnection() Table.create('messages', schema=[ HashKey('subject'), RangeKey('version'), ], global_indexes=[ GlobalAllIndex('topic-created_at-index', parts=[ HashKey('topic'), RangeKey('created_at', data_type='N') ], throughput={ 'read': 6, 'write': 1 } ), ]) table_description = conn.describe_table("messages") table_description['Table']["GlobalSecondaryIndexes"].should.equal([ { "IndexName": "topic-created_at-index", "KeySchema": [ { "AttributeName": "topic", "KeyType": "HASH" }, { "AttributeName": "created_at", "KeyType": "RANGE" }, ], "Projection": { "ProjectionType": "ALL" }, "ProvisionedThroughput": { "ReadCapacityUnits": 6, "WriteCapacityUnits": 1, } } ])
def createTable(): consumer_complaint = Table.create('consumer_complaint', schema=[ HashKey('Complaint_ID'), # defaults to STRING data_type ], throughput={ 'read': 5, 'write': 15, }, global_indexes=[ GlobalAllIndex('EverythingIndex', parts=[ HashKey('State'), ], throughput={ 'read': 1, 'write': 1, }) ], # If you need to specify custom parameters, such as credentials or region, # use the following: connection=boto.dynamodb2.connect_to_region('us-west-2') ) return consumer_complaint
def __init__(self): super(self.__class__, self).__init__('store', schema=[HashKey('store_id')], global_indexes=[ GlobalAllIndex('StoreCompanyIndex', parts=[ HashKey('company_id'), RangeKey('store_id', data_type=STRING) ], throughput={ 'read': 3, 'write': 3 }) ], throughput={ 'read': 3, 'write': 3 }, record_class=Store)
def _mock_create_table( self, name, hash_key=MOCK_TABLE_HASH_KEY, range_key=MOCK_TABLE_RANGE_KEY, read_throughput=MOCK_TABLE_READ_THROUGHPUT, write_throughput=MOCK_TABLE_WRITE_THROUGHPUT, global_index_name=MOCK_TABLE_GLOBAL_INDEX_NAME, global_index_attr_name=MOCK_TABLE_GLOBAL_INDEX_ATTR_NAME): Table.create(name, schema=[HashKey(hash_key), RangeKey(range_key)], throughput={ 'read': read_throughput, 'write': write_throughput }, global_indexes=[ GlobalAllIndex( global_index_name, parts=[HashKey(global_index_attr_name)]) ])
def tableCreateKwargs(self): return dict( schema=[HashKey("metric_name_tweet_uid"), RangeKey("agg_ts")], throughput={ "read": 1, "write": taurus.engine.config.getint("dynamodb", "metric_tweets_throughput_write") }, global_indexes=[ GlobalAllIndex( "taurus.metric_data-metric_name_index", parts=[HashKey("metric_name"), RangeKey("agg_ts")], throughput={ "read": (taurus.engine.config.getint( "dynamodb", "metric_tweets_throughput_read")), "write": (taurus.engine.config.getint( "dynamodb", "metric_tweets_throughput_write")) }) ])