def spanner_read_data(request=None): (instance_id, database_id) = from_env() instance = client.instance(instance_id) database = instance.database(database_id) logging.info("For {}:{}".format(instance_id, database_id)) # Delete Batches. Also deletes child batch_bsos rows (INTERLEAVE # IN PARENT batches ON DELETE CASCADE) with statsd.timer("syncstorage.purge_ttl.batches_duration"): batches_start = datetime.now() query = 'DELETE FROM batches WHERE expiry < CURRENT_TIMESTAMP()' result = database.execute_partitioned_dml(query) batches_end = datetime.now() logging.info("batches: removed {} rows, batches_duration: {}".format( result, batches_end - batches_start)) # Delete BSOs with statsd.timer("syncstorage.purge_ttl.bso_duration"): bso_start = datetime.now() query = 'DELETE FROM bsos WHERE expiry < CURRENT_TIMESTAMP()' result = database.execute_partitioned_dml(query) bso_end = datetime.now() logging.info("bso: removed {} rows, bso_duration: {}".format( result, bso_end - bso_start))
def ringo_conversation_tweet(): (cluster, cs) = get_cassandra() rds = get_redis() try: tw = get_twitter(cs) setted = rds.setnx('ringo-conversation-tweet', '1') if not setted: return rds.expire('ringo-conversation-tweet', 86400) targets = cs.execute("SELECT * FROM targets") target_user_ids = [row.user_id for row in targets] choice = random.choice(target_user_ids) observation = cs.execute("SELECT * FROM users_observations WHERE user_id = %s", (choice,)) if observation is None: return observation = list(observation) if len(observation) == 0: return row = list(observation)[0] profile = json.loads(row.blob) screen_name = profile['screen_name'] status_text = "Hey @%s %s" % (screen_name, str(time.time())) with statsd.timer('twitter.req.statuses-update'): tw.update_status(status_text) except: rds.delete('ringo-conversation-tweet') raise finally: cluster.shutdown()
def deleter(database, name, query): with statsd.timer("syncstorage.purge_ttl.{}_duration".format(name)): logging.info("Running: {}".format(query)) start = datetime.now() result = database.execute_partitioned_dml(query) end = datetime.now() logging.info( "{name}: removed {result} rows, {name}_duration: {time}".format( name=name, result=result, time=end - start))
def batch_user_profile(user_ids): (cluster, cs) = get_cassandra() try: tw = get_twitter(cs) with statsd.timer('twitter.req.users-lookup'): user_profiles = tw.lookup_users(user_ids) stmt_insert = cs.prepare("INSERT INTO users_observations (user_id, blob) values (?, ?)") for profile in user_profiles: cs.execute(stmt_insert, (profile.id, json.dumps(profile._json))) finally: cluster.shutdown()
def spanner_read_data(query, table): (instance_id, database_id) = from_env() instance = client.instance(instance_id) database = instance.database(database_id) logging.info("For {}:{}".format(instance_id, database_id)) # Count bsos expired rows with statsd.timer(f"syncstorage.count_expired_{table}_rows.duration"): with database.snapshot() as snapshot: result = snapshot.execute_sql(query) row_count = result.one()[0] statsd.gauge(f"syncstorage.expired_{table}_rows", row_count) logging.info(f"Found {row_count} expired rows in {table}")
def spanner_read_data(request=None): (instance_id, database_id) = from_env() instance = client.instance(instance_id) database = instance.database(database_id) logging.info("For {}:{}".format(instance_id, database_id)) # Count users with statsd.timer("syncstorage.count_users.duration"): with database.snapshot() as snapshot: query = 'SELECT COUNT (DISTINCT fxa_uid) FROM user_collections' result = snapshot.execute_sql(query) user_count = result.one()[0] statsd.gauge("syncstorage.distinct_fxa_uid", user_count) logging.info("Count found {} distinct users".format(user_count))
def deleter(database: Database, name: str, query: str, prefix: Optional[str]=None, params: Optional[dict]=None, param_types: Optional[dict]=None, dryrun: Optional[bool]=False): with statsd.timer("syncstorage.purge_ttl.{}_duration".format(name)): logging.info("Running: {} :: {}".format(query, params)) start = datetime.now() result = 0 if not dryrun: result = database.execute_partitioned_dml(query, params=params, param_types=param_types) end = datetime.now() logging.info( "{name}: removed {result} rows, {name}_duration: {time}, prefix: {prefix}".format( name=name, result=result, time=end - start, prefix=prefix))
# IN PARENT batches ON DELETE CASCADE) with statsd.timer("syncstorage.purge_ttl.batches_duration"): batches_start = datetime.now() query = 'DELETE FROM batches WHERE expiry < CURRENT_TIMESTAMP()' result = database.execute_partitioned_dml(query) batches_end = datetime.now() logging.info("batches: removed {} rows, batches_duration: {}".format( result, batches_end - batches_start)) # Delete BSOs with statsd.timer("syncstorage.purge_ttl.bso_duration"): bso_start = datetime.now() query = 'DELETE FROM bsos WHERE expiry < CURRENT_TIMESTAMP()' result = database.execute_partitioned_dml(query) bso_end = datetime.now() logging.info("bso: removed {} rows, bso_duration: {}".format( result, bso_end - bso_start)) if __name__ == "__main__": with statsd.timer("syncstorage.purge_ttl.total_duration"): start_time = datetime.now() logging.info('Starting purge_ttl.py') spanner_read_data() end_time = datetime.now() duration = end_time - start_time logging.info( 'Completed purge_ttl.py, total_duration: {}'.format(duration))
def process_request(self, request): self.current_view = resolve(request.path).url_name statsd.incr("view.exceptions.{0}".format(self.current_view)) self.timer = statsd.timer('view.{0}'.format(self.current_view)) self.timer.start()