def update(dataset, submission_date, limit=None): if limit: limit = int(limit) if dataset not in ["telemetry", "telemetry-release"]: raise ValueError("Unsupported dataset") conn = boto.connect_s3(host="s3-us-west-2.amazonaws.com") meta_bucket = conn.get_bucket(METADATA_BUCKET, validate=False) sources = json.loads(meta_bucket.get_key("sources.json").get_contents_as_string()) bucket = sources[dataset]["bucket"] prefix = sources[dataset]["prefix"] if prefix == "telemetry-2": sdb = SDB("telemetry_v4", read_only=False) # Backwards compatibility else: sdb = SDB(prefix, read_only=False) prev = sdb.get_daily_stats(submission_date, submission_date) update_published_v4_files(sdb, bucket, prefix, submission_date=submission_date, limit=limit) curr = sdb.get_daily_stats(submission_date, submission_date) print "Filter service stats:" print "Note that the following numbers are correct only if there isn't another entity concurrently pushing new submissions:" sdb.diff_stats(prev, curr) print "AWS lambda stats:" sdb.print_lambda_stats(submission_date, submission_date)
def update(dataset, submission_date, limit=None): if limit: limit = int(limit) if dataset not in ["telemetry", "telemetry-release"]: raise ValueError("Unsupported dataset") conn = boto.connect_s3(host=S3_DEFAULT_ENDPOINT) meta_bucket = conn.get_bucket(METADATA_BUCKET, validate=False) sources = json.loads( meta_bucket.get_key("sources.json").get_contents_as_string()) bucket = sources[dataset]["bucket"] prefix = sources[dataset]["prefix"] if prefix == "telemetry-2": sdb = SDB("telemetry_v4", read_only=False) # Backwards compatibility else: sdb = SDB(prefix, read_only=False) prev = sdb.get_daily_stats(submission_date, submission_date) update_published_v4_files(sdb, bucket, prefix, submission_date=submission_date, limit=limit) curr = sdb.get_daily_stats(submission_date, submission_date) print "Filter service stats:" print "Note that the following numbers are correct only if there isn't another entity concurrently pushing new submissions:" sdb.diff_stats(prev, curr) print "AWS lambda stats:" sdb.print_lambda_stats(submission_date, submission_date)
def main(limit=None, schema_version=None, from_date=None, to_date=None): if from_date and not to_date: to_date = datetime.now().strftime("%Y%m%d") if limit: limit = int(limit) if schema_version != "v2" and schema_version != "v4": raise ValueError("Unsupported schema version") if schema_version == "v2": sdb = SDB("telemetry_v2", read_only=False) else: sdb = SDB("telemetry_v4", read_only=False) if from_date: prev = sdb.get_daily_stats(from_date, to_date) if schema_version == "v2": update_published_v2_files(sdb, from_submission_date=from_date, to_submission_date=to_date, limit=limit) else: update_published_v4_files(sdb, from_submission_date=from_date, to_submission_date=to_date, limit=limit) if from_date: curr = sdb.get_daily_stats(from_date, to_date) print print "Filter service stats:" print "Note that the following numbers are correct only if there ins't another entity concurrently pushing new submissions:" sdb.diff_stats(prev, curr) print "AWS lambda stats:" sdb.print_lambda_stats(from_date, to_date)