class KVStore(KVStoreBase): def __init__(self): super(KVStore, self).__init__() region = settings.AWS_REGION_NAME access_key = settings.AWS_ACCESS_KEY_ID secret = settings.AWS_SECRET_ACCESS_KEY conn = boto.dynamodb2.connect_to_region(region, aws_access_key_id=access_key, aws_secret_access_key=secret) self.table = Table(settings.THUMBNAIL_DYNAMODB_NAME, connection=conn) def _get_raw(self, key): try: return self.table.get_item(key=key)["value"] except boto.dynamodb2.exceptions.ItemNotFound: pass def _set_raw(self, key, value): try: item = self.table.get_item(key=key) except boto.dynamodb2.exceptions.ItemNotFound: item = self.table.new_item() item["key"] = key item["value"] = value item.save(overwrite=True) def _delete_raw(self, *keys): [self.table.delete_item(key=k) for k in keys] def _find_keys_raw(self, prefix): return [i["key"] for i in self.table.scan(key__beginswith=prefix)]
class KVStore(KVStoreBase): def __init__(self): super(KVStore, self).__init__() region = settings.AWS_REGION_NAME access_key = settings.AWS_ACCESS_KEY_ID secret = settings.AWS_SECRET_ACCESS_KEY conn = boto.dynamodb2.connect_to_region(region, aws_access_key_id=access_key, aws_secret_access_key=secret) self.table = Table(settings.THUMBNAIL_DYNAMODB_NAME, connection=conn) def _get_raw(self, key): try: return self.table.get_item(key=key)['value'] except boto.dynamodb2.exceptions.ItemNotFound: pass def _set_raw(self, key, value): try: item = self.table.get_item(key=key) except boto.dynamodb2.exceptions.ItemNotFound: item = self.table.new_item() item['key'] = key item['value'] = value item.save(overwrite=True) def _delete_raw(self, *keys): [self.table.delete_item(key=k) for k in keys] def _find_keys_raw(self, prefix): return [i['key'] for i in self.table.scan(key__beginswith=prefix)]
def main(): aud = sys.argv[1] f = open(aud) region = 'us-east-1' #region = 'ap-southeast-1' print 'Connecting to %s with IAM role' % (region) #conn = boto.dynamodb.connect_to_region(region) # table = conn.get_table('users1') table = Table('users1') skipped = 0 newcnt = 0 updatedcnt = 0 samecnt = 0 cnt = 0 batchcnt = 0 errcnt = 0 batch = None for line in f: if not batch: batch = table.batch_write() print "Got batch %s" % batch (cookie, segs) = line.split("\t") if " " in cookie: cookie = cookie.replace(" ", "+") if not cookie.endswith("=="): cookie = cookie + "==" try: # print "Decoding %s" % cookie cdec = base64.b64decode(cookie) s = struct.unpack("<IIII", cdec) uid = "%08X%08X%08X%08X" % s except: errcnt += 1 continue # print "%s -> %s" % (cookie, uid) seg_list = segs.split(",") seg_list = ['%s:tp:1' % s for s in seg_list] try: item = table.get_item(key=uid) json = item['doAttr'] e = simplejson.loads(json) if not e: newcnt += 1 e = [] except boto.dynamodb2.exceptions.ItemNotFound: newcnt += 1 item = {'dtAttr': 'java.util.Set', 'doAttr': '[]'} e = [] # e - existing e = [s.replace(':fp:', ':tp:').strip() for s in e] e = sets.Set(e) # n - new n = sets.Set(seg_list) # combine n.update(e) # if the same no need to write if n == e: samecnt += 1 skipped += 1 continue elif e: updatedcnt += 1 n = list(n) item['doAttr'] = simplejson.dumps(n) #print "Putting %s" % item batchcnt += 1 batch.put_item(data={ 'doAttr': item['doAttr'], 'dtAttr': 'java.util.Set', 'key': uid }) #item.put() cnt += 1 if cnt % BATCH_SIZE == 0: batch.flush() batch = None if cnt % 5000 == 0: print "OK" print item print "User count: %s total, updated %s, same %s, new %s, error %s" % ( cnt, updatedcnt, samecnt, newcnt, errcnt) print "Wrote %s users" % cnt item2 = table.new_item( key='LAST_WRITE', attrs={ 'dtAttr': 'java.lang.String', 'doAttr': "LOTAME: User count: %s total, updated %s, same %s, new %s, error %s at %s\nLast user: %s : %s" % (cnt, updatedcnt, samecnt, newcnt, errcnt, datetime.datetime.now(), user, str(item)) }) print item2 item2.put() batch.flush() item2 = table.new_item({ key: 'LAST_WRITE', 'dtAttr': 'java.util.String', 'doAttr': "LOTAME: User count: %s total, updated %s, same %s, new %s, error %s at %s" % (cnt, updatedcnt, samecnt, newcnt, errcnt, datetime.datetime.now()), 'item': item }) print item2 item2.put() print "Added or updated %s users, skipped %s, to %s region" % ( cnt, skipped, region)